linux下暂时禁用tesseract-ocr
This commit is contained in:
parent
080915ddd2
commit
6f9d5de778
|
@ -29,8 +29,6 @@
|
|||
<Add option="../../../third_party/opencv/uos/amd64/lib/libittnotify.a" />
|
||||
<Add option="../../../third_party/opencv/uos/amd64/lib/libzlib.a" />
|
||||
<Add option="../../../third_party/freetype/uos/amd64/lib/libfreetype.a" />
|
||||
<Add option="../../../third_party/ocr/tesseract-ocr/uos/amd64/lib/libtesseract.a" />
|
||||
<Add option="../../../third_party/leptonica/uos/amd64/lib/libleptonica.a" />
|
||||
<Add option="-L../HGBase/bin/uos_x86_64_Debug -lHGBase" />
|
||||
<Add option="-L../HGImgFmt/bin/uos_x86_64_Debug -lHGImgFmt" />
|
||||
<Add option="-ldl" />
|
||||
|
@ -61,8 +59,6 @@
|
|||
<Add option="../../../third_party/opencv/uos/amd64/lib/libittnotify.a" />
|
||||
<Add option="../../../third_party/opencv/uos/amd64/lib/libzlib.a" />
|
||||
<Add option="../../../third_party/freetype/uos/amd64/lib/libfreetype.a" />
|
||||
<Add option="../../../third_party/ocr/tesseract-ocr/uos/amd64/lib/libtesseract.a" />
|
||||
<Add option="../../../third_party/leptonica/uos/amd64/lib/libleptonica.a" />
|
||||
<Add option="-L../../../../release/uos/x86_64 -lHGBase -lHGImgFmt" />
|
||||
<Add option="-ldl" />
|
||||
<Add option="-lpthread" />
|
||||
|
@ -495,8 +491,6 @@
|
|||
<Unit filename="../../../modules/imgproc/HGOCRHanvon.hpp" />
|
||||
<Unit filename="../../../modules/imgproc/HGOCRRetImpl.cpp" />
|
||||
<Unit filename="../../../modules/imgproc/HGOCRRetImpl.hpp" />
|
||||
<Unit filename="../../../modules/imgproc/HGOCRTesseract.cpp" />
|
||||
<Unit filename="../../../modules/imgproc/HGOCRTesseract.hpp" />
|
||||
<Unit filename="../../../modules/imgproc/ImageProcess/ImageApply.cpp" />
|
||||
<Unit filename="../../../modules/imgproc/ImageProcess/ImageApply.h" />
|
||||
<Unit filename="../../../modules/imgproc/ImageProcess/ImageApplyAdjustColors.cpp" />
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
#include "HGOCR.h"
|
||||
#include "HGOCRBase.hpp"
|
||||
#include "HGOCRHanvon.hpp"
|
||||
#if defined(HG_CMP_MSC)
|
||||
#include "HGOCRTesseract.hpp"
|
||||
#endif
|
||||
#include "HGOCRRetImpl.hpp"
|
||||
|
||||
HGResult HGAPI HGImgProc_CreateOCRMgr(HGUInt algo, HGOCRMgr* ocrMgr)
|
||||
|
@ -13,6 +15,7 @@ HGResult HGAPI HGImgProc_CreateOCRMgr(HGUInt algo, HGOCRMgr* ocrMgr)
|
|||
|
||||
if (HGIMGPROC_OCRALGO_DEFAULT == algo)
|
||||
{
|
||||
#if defined(HG_CMP_MSC)
|
||||
HGOCRBase* ocrMgrImpl = new HGOCRHanvon;
|
||||
HGResult ret = ocrMgrImpl->Init();
|
||||
if (HGBASE_ERR_OK != ret)
|
||||
|
@ -26,6 +29,15 @@ HGResult HGAPI HGImgProc_CreateOCRMgr(HGUInt algo, HGOCRMgr* ocrMgr)
|
|||
return ret;
|
||||
}
|
||||
}
|
||||
#else
|
||||
HGOCRBase* ocrMgrImpl = new HGOCRHanvon;
|
||||
HGResult ret = ocrMgrImpl->Init();
|
||||
if (HGBASE_ERR_OK != ret)
|
||||
{
|
||||
delete ocrMgrImpl;
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
*ocrMgr = (HGOCRMgr)ocrMgrImpl;
|
||||
return HGBASE_ERR_OK;
|
||||
|
@ -45,6 +57,7 @@ HGResult HGAPI HGImgProc_CreateOCRMgr(HGUInt algo, HGOCRMgr* ocrMgr)
|
|||
}
|
||||
else if (HGIMGPROC_OCRALGO_TESSERACT == algo)
|
||||
{
|
||||
#if defined(HG_CMP_MSC)
|
||||
HGOCRBase* ocrMgrImpl = new HGOCRTesseract;
|
||||
HGResult ret = ocrMgrImpl->Init();
|
||||
if (HGBASE_ERR_OK != ret)
|
||||
|
@ -55,6 +68,10 @@ HGResult HGAPI HGImgProc_CreateOCRMgr(HGUInt algo, HGOCRMgr* ocrMgr)
|
|||
|
||||
*ocrMgr = (HGOCRMgr)ocrMgrImpl;
|
||||
return HGBASE_ERR_OK;
|
||||
#else
|
||||
return HGBASE_ERR_INVALIDARG;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
return HGBASE_ERR_INVALIDARG;
|
||||
|
|
|
@ -1,812 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: baseapi.h
|
||||
// Description: Simple API for calling tesseract.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_BASEAPI_H_
|
||||
#define TESSERACT_API_BASEAPI_H_
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config_auto.h" // DISABLED_LEGACY_ENGINE
|
||||
#endif
|
||||
|
||||
#include "export.h"
|
||||
#include "pageiterator.h"
|
||||
#include "publictypes.h"
|
||||
#include "resultiterator.h"
|
||||
#include "unichar.h"
|
||||
|
||||
#include "version.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <vector> // for std::vector
|
||||
|
||||
struct Pix;
|
||||
struct Pixa;
|
||||
struct Boxa;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class PAGE_RES;
|
||||
class ParagraphModel;
|
||||
class BLOCK_LIST;
|
||||
class ETEXT_DESC;
|
||||
struct OSResults;
|
||||
class UNICHARSET;
|
||||
|
||||
class Dawg;
|
||||
class Dict;
|
||||
class EquationDetect;
|
||||
class PageIterator;
|
||||
class ImageThresholder;
|
||||
class LTRResultIterator;
|
||||
class ResultIterator;
|
||||
class MutableIterator;
|
||||
class TessResultRenderer;
|
||||
class Tesseract;
|
||||
|
||||
// Function to read a std::vector<char> from a whole file.
|
||||
// Returns false on failure.
|
||||
using FileReader = bool (*)(const char *filename, std::vector<char> *data);
|
||||
|
||||
using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
|
||||
bool) const;
|
||||
using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
|
||||
int, const char *, int);
|
||||
|
||||
/**
|
||||
* Base class for all tesseract APIs.
|
||||
* Specific classes can add ability to work on different inputs or produce
|
||||
* different outputs.
|
||||
* This class is mostly an interface layer on top of the Tesseract instance
|
||||
* class to hide the data types so that users of this class don't have to
|
||||
* include any other Tesseract headers.
|
||||
*/
|
||||
class TESS_API TessBaseAPI {
|
||||
public:
|
||||
TessBaseAPI();
|
||||
virtual ~TessBaseAPI();
|
||||
// Copy constructor and assignment operator are currently unsupported.
|
||||
TessBaseAPI(TessBaseAPI const &) = delete;
|
||||
TessBaseAPI &operator=(TessBaseAPI const &) = delete;
|
||||
|
||||
/**
|
||||
* Returns the version identifier as a static string. Do not delete.
|
||||
*/
|
||||
static const char *Version();
|
||||
|
||||
/**
|
||||
* If compiled with OpenCL AND an available OpenCL
|
||||
* device is deemed faster than serial code, then
|
||||
* "device" is populated with the cl_device_id
|
||||
* and returns sizeof(cl_device_id)
|
||||
* otherwise *device=nullptr and returns 0.
|
||||
*/
|
||||
static size_t getOpenCLDevice(void **device);
|
||||
|
||||
/**
|
||||
* Set the name of the input file. Needed for training and
|
||||
* reading a UNLV zone file, and for searchable PDF output.
|
||||
*/
|
||||
void SetInputName(const char *name);
|
||||
/**
|
||||
* These functions are required for searchable PDF output.
|
||||
* We need our hands on the input file so that we can include
|
||||
* it in the PDF without transcoding. If that is not possible,
|
||||
* we need the original image. Finally, resolution metadata
|
||||
* is stored in the PDF so we need that as well.
|
||||
*/
|
||||
const char *GetInputName();
|
||||
// Takes ownership of the input pix.
|
||||
void SetInputImage(Pix *pix);
|
||||
Pix *GetInputImage();
|
||||
int GetSourceYResolution();
|
||||
const char *GetDatapath();
|
||||
|
||||
/** Set the name of the bonus output files. Needed only for debugging. */
|
||||
void SetOutputName(const char *name);
|
||||
|
||||
/**
|
||||
* Set the value of an internal "parameter."
|
||||
* Supply the name of the parameter and the value as a string, just as
|
||||
* you would in a config file.
|
||||
* Returns false if the name lookup failed.
|
||||
* Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
|
||||
* Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
|
||||
* SetVariable may be used before Init, but settings will revert to
|
||||
* defaults on End().
|
||||
*
|
||||
* Note: Must be called after Init(). Only works for non-init variables
|
||||
* (init variables should be passed to Init()).
|
||||
*/
|
||||
bool SetVariable(const char *name, const char *value);
|
||||
bool SetDebugVariable(const char *name, const char *value);
|
||||
|
||||
/**
|
||||
* Returns true if the parameter was found among Tesseract parameters.
|
||||
* Fills in value with the value of the parameter.
|
||||
*/
|
||||
bool GetIntVariable(const char *name, int *value) const;
|
||||
bool GetBoolVariable(const char *name, bool *value) const;
|
||||
bool GetDoubleVariable(const char *name, double *value) const;
|
||||
|
||||
/**
|
||||
* Returns the pointer to the string that represents the value of the
|
||||
* parameter if it was found among Tesseract parameters.
|
||||
*/
|
||||
const char *GetStringVariable(const char *name) const;
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Print Tesseract fonts table to the given file.
|
||||
*/
|
||||
void PrintFontsTable(FILE *fp) const;
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Print Tesseract parameters to the given file.
|
||||
*/
|
||||
void PrintVariables(FILE *fp) const;
|
||||
|
||||
/**
|
||||
* Get value of named variable as a string, if it exists.
|
||||
*/
|
||||
bool GetVariableAsString(const char *name, std::string *val) const;
|
||||
|
||||
/**
|
||||
* Instances are now mostly thread-safe and totally independent,
|
||||
* but some global parameters remain. Basically it is safe to use multiple
|
||||
* TessBaseAPIs in different threads in parallel, UNLESS:
|
||||
* you use SetVariable on some of the Params in classify and textord.
|
||||
* If you do, then the effect will be to change it for all your instances.
|
||||
*
|
||||
* Start tesseract. Returns zero on success and -1 on failure.
|
||||
* NOTE that the only members that may be called before Init are those
|
||||
* listed above here in the class definition.
|
||||
*
|
||||
* The datapath must be the name of the tessdata directory.
|
||||
* The language is (usually) an ISO 639-3 string or nullptr will default to
|
||||
* eng. It is entirely safe (and eventually will be efficient too) to call
|
||||
* Init multiple times on the same instance to change language, or just
|
||||
* to reset the classifier.
|
||||
* The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
|
||||
* that multiple languages are to be loaded. Eg hin+eng will load Hindi and
|
||||
* English. Languages may specify internally that they want to be loaded
|
||||
* with one or more other languages, so the ~ sign is available to override
|
||||
* that. Eg if hin were set to load eng by default, then hin+~eng would force
|
||||
* loading only hin. The number of loaded languages is limited only by
|
||||
* memory, with the caveat that loading additional languages will impact
|
||||
* both speed and accuracy, as there is more work to do to decide on the
|
||||
* applicable language, and there is more chance of hallucinating incorrect
|
||||
* words.
|
||||
* WARNING: On changing languages, all Tesseract parameters are reset
|
||||
* back to their default values. (Which may vary between languages.)
|
||||
* If you have a rare need to set a Variable that controls
|
||||
* initialization for a second call to Init you should explicitly
|
||||
* call End() and then use SetVariable before Init. This is only a very
|
||||
* rare use case, since there are very few uses that require any parameters
|
||||
* to be set before Init.
|
||||
*
|
||||
* If set_only_non_debug_params is true, only params that do not contain
|
||||
* "debug" in the name will be set.
|
||||
*/
|
||||
int Init(const char *datapath, const char *language, OcrEngineMode mode,
|
||||
char **configs, int configs_size,
|
||||
const std::vector<std::string> *vars_vec,
|
||||
const std::vector<std::string> *vars_values,
|
||||
bool set_only_non_debug_params);
|
||||
int Init(const char *datapath, const char *language, OcrEngineMode oem) {
|
||||
return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
|
||||
}
|
||||
int Init(const char *datapath, const char *language) {
|
||||
return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
|
||||
false);
|
||||
}
|
||||
// In-memory version reads the traineddata file directly from the given
|
||||
// data[data_size] array, and/or reads data via a FileReader.
|
||||
int Init(const char *data, int data_size, const char *language,
|
||||
OcrEngineMode mode, char **configs, int configs_size,
|
||||
const std::vector<std::string> *vars_vec,
|
||||
const std::vector<std::string> *vars_values,
|
||||
bool set_only_non_debug_params, FileReader reader);
|
||||
|
||||
/**
|
||||
* Returns the languages string used in the last valid initialization.
|
||||
* If the last initialization specified "deu+hin" then that will be
|
||||
* returned. If hin loaded eng automatically as well, then that will
|
||||
* not be included in this list. To find the languages actually
|
||||
* loaded use GetLoadedLanguagesAsVector.
|
||||
* The returned string should NOT be deleted.
|
||||
*/
|
||||
const char *GetInitLanguagesAsString() const;
|
||||
|
||||
/**
|
||||
* Returns the loaded languages in the vector of std::string.
|
||||
* Includes all languages loaded by the last Init, including those loaded
|
||||
* as dependencies of other loaded languages.
|
||||
*/
|
||||
void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
|
||||
|
||||
/**
|
||||
* Returns the available languages in the sorted vector of std::string.
|
||||
*/
|
||||
void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
|
||||
|
||||
/**
|
||||
* Init only for page layout analysis. Use only for calls to SetImage and
|
||||
* AnalysePage. Calls that attempt recognition will generate an error.
|
||||
*/
|
||||
void InitForAnalysePage();
|
||||
|
||||
/**
|
||||
* Read a "config" file containing a set of param, value pairs.
|
||||
* Searches the standard places: tessdata/configs, tessdata/tessconfigs
|
||||
* and also accepts a relative or absolute path name.
|
||||
* Note: only non-init params will be set (init params are set by Init()).
|
||||
*/
|
||||
void ReadConfigFile(const char *filename);
|
||||
/** Same as above, but only set debug params from the given config file. */
|
||||
void ReadDebugConfigFile(const char *filename);
|
||||
|
||||
/**
|
||||
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
|
||||
* The mode is stored as an IntParam so it can also be modified by
|
||||
* ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
|
||||
*/
|
||||
void SetPageSegMode(PageSegMode mode);
|
||||
|
||||
/** Return the current page segmentation mode. */
|
||||
PageSegMode GetPageSegMode() const;
|
||||
|
||||
/**
|
||||
* Recognize a rectangle from an image and return the result as a string.
|
||||
* May be called many times for a single Init.
|
||||
* Currently has no error checking.
|
||||
* Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
|
||||
* Palette color images will not work properly and must be converted to
|
||||
* 24 bit.
|
||||
* Binary images of 1 bit per pixel may also be given but they must be
|
||||
* byte packed with the MSB of the first byte being the first pixel, and a
|
||||
* 1 represents WHITE. For binary images set bytes_per_pixel=0.
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
*
|
||||
* Note that TesseractRect is the simplified convenience interface.
|
||||
* For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
|
||||
* and one or more of the Get*Text functions below.
|
||||
*/
|
||||
char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
|
||||
int bytes_per_line, int left, int top, int width,
|
||||
int height);
|
||||
|
||||
/**
|
||||
* Call between pages or documents etc to free up memory and forget
|
||||
* adaptive data.
|
||||
*/
|
||||
void ClearAdaptiveClassifier();
|
||||
|
||||
/**
|
||||
* @defgroup AdvancedAPI Advanced API
|
||||
* The following methods break TesseractRect into pieces, so you can
|
||||
* get hold of the thresholded image, get the text in different formats,
|
||||
* get bounding boxes, confidences etc.
|
||||
*/
|
||||
/* @{ */
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. Format is as
|
||||
* TesseractRect above. Copies the image buffer and converts to Pix.
|
||||
* SetImage clears all recognition results, and sets the rectangle to the
|
||||
* full image, so it may be followed immediately by a GetUTF8Text, and it
|
||||
* will automatically perform recognition.
|
||||
*/
|
||||
void SetImage(const unsigned char *imagedata, int width, int height,
|
||||
int bytes_per_pixel, int bytes_per_line);
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. As with SetImage above,
|
||||
* Tesseract takes its own copy of the image, so it need not persist until
|
||||
* after Recognize.
|
||||
* Pix vs raw, which to use?
|
||||
* Use Pix where possible. Tesseract uses Pix as its internal representation
|
||||
* and it is therefore more efficient to provide a Pix directly.
|
||||
*/
|
||||
void SetImage(Pix *pix);
|
||||
|
||||
/**
|
||||
* Set the resolution of the source image in pixels per inch so font size
|
||||
* information can be calculated in results. Call this after SetImage().
|
||||
*/
|
||||
void SetSourceResolution(int ppi);
|
||||
|
||||
/**
|
||||
* Restrict recognition to a sub-rectangle of the image. Call after SetImage.
|
||||
* Each SetRectangle clears the recogntion results so multiple rectangles
|
||||
* can be recognized with the same image.
|
||||
*/
|
||||
void SetRectangle(int left, int top, int width, int height);
|
||||
|
||||
/**
|
||||
* Get a copy of the internal thresholded image from Tesseract.
|
||||
* Caller takes ownership of the Pix and must pixDestroy it.
|
||||
* May be called any time after SetImage, or after TesseractRect.
|
||||
*/
|
||||
Pix *GetThresholdedImage();
|
||||
|
||||
/**
|
||||
* Get the result of page layout analysis as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
*/
|
||||
Boxa *GetRegions(Pixa **pixa);
|
||||
|
||||
/**
|
||||
* Get the textlines as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* If raw_image is true, then extract from the original image instead of the
|
||||
* thresholded image and pad by raw_padding pixels.
|
||||
* If blockids is not nullptr, the block-id of each line is also returned as
|
||||
* an array of one element per line. delete [] after use. If paraids is not
|
||||
* nullptr, the paragraph-id of each line within its block is also returned as
|
||||
* an array of one element per line. delete [] after use.
|
||||
*/
|
||||
Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
/*
|
||||
Helper method to extract from the thresholded image. (most common usage)
|
||||
*/
|
||||
Boxa *GetTextlines(Pixa **pixa, int **blockids) {
|
||||
return GetTextlines(false, 0, pixa, blockids, nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
|
||||
* pair, in reading order. Enables downstream handling of non-rectangular
|
||||
* regions.
|
||||
* Can be called before or after Recognize.
|
||||
* If blockids is not nullptr, the block-id of each line is also returned as
|
||||
* an array of one element per line. delete [] after use.
|
||||
*/
|
||||
Boxa *GetStrips(Pixa **pixa, int **blockids);
|
||||
|
||||
/**
|
||||
* Get the words as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
*/
|
||||
Boxa *GetWords(Pixa **pixa);
|
||||
|
||||
/**
|
||||
* Gets the individual connected (text) components (created
|
||||
* after pages segmentation step, but before recognition)
|
||||
* as a leptonica-style Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* Note: the caller is responsible for calling boxaDestroy()
|
||||
* on the returned Boxa array and pixaDestroy() on cc array.
|
||||
*/
|
||||
Boxa *GetConnectedComponents(Pixa **cc);
|
||||
|
||||
/**
|
||||
* Get the given level kind of components (block, textline, word etc.) as a
|
||||
* leptonica-style Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* If blockids is not nullptr, the block-id of each component is also returned
|
||||
* as an array of one element per component. delete [] after use.
|
||||
* If blockids is not nullptr, the paragraph-id of each component with its
|
||||
* block is also returned as an array of one element per component. delete []
|
||||
* after use. If raw_image is true, then portions of the original image are
|
||||
* extracted instead of the thresholded image and padded with raw_padding. If
|
||||
* text_only is true, then only text components are returned.
|
||||
*/
|
||||
Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
|
||||
bool raw_image, int raw_padding, Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
// Helper function to get binary images with no padding (most common usage).
|
||||
Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
|
||||
Pixa **pixa, int **blockids) {
|
||||
return GetComponentImages(level, text_only, false, 0, pixa, blockids,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the scale factor of the thresholded image that would be returned by
|
||||
* GetThresholdedImage() and the various GetX() methods that call
|
||||
* GetComponentImages().
|
||||
* Returns 0 if no thresholder has been set.
|
||||
*/
|
||||
int GetThresholdedImageScaleFactor() const;
|
||||
|
||||
/**
|
||||
* Runs page layout analysis in the mode set by SetPageSegMode.
|
||||
* May optionally be called prior to Recognize to get access to just
|
||||
* the page layout results. Returns an iterator to the results.
|
||||
* If merge_similar_words is true, words are combined where suitable for use
|
||||
* with a line recognizer. Use if you want to use AnalyseLayout to find the
|
||||
* textlines, and then want to process textline fragments with an external
|
||||
* line recognizer.
|
||||
* Returns nullptr on error or an empty page.
|
||||
* The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
PageIterator *AnalyseLayout();
|
||||
PageIterator *AnalyseLayout(bool merge_similar_words);
|
||||
|
||||
/**
|
||||
* Recognize the image from SetAndThresholdImage, generating Tesseract
|
||||
* internal structures. Returns 0 on success.
|
||||
* Optional. The Get*Text functions below will call Recognize if needed.
|
||||
* After Recognize, the output is kept internally until the next SetImage.
|
||||
*/
|
||||
int Recognize(ETEXT_DESC *monitor);
|
||||
|
||||
/**
|
||||
* Methods to retrieve information after SetAndThresholdImage(),
|
||||
* Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
|
||||
*/
|
||||
|
||||
/**
|
||||
* Turns images into symbolic text.
|
||||
*
|
||||
* filename can point to a single image, a multi-page TIFF,
|
||||
* or a plain text list of image filenames.
|
||||
*
|
||||
* retry_config is useful for debugging. If not nullptr, you can fall
|
||||
* back to an alternate configuration if a page fails for some
|
||||
* reason.
|
||||
*
|
||||
* timeout_millisec terminates processing if any single page
|
||||
* takes too long. Set to 0 for unlimited time.
|
||||
*
|
||||
* renderer is responible for creating the output. For example,
|
||||
* use the TessTextRenderer if you want plaintext output, or
|
||||
* the TessPDFRender to produce searchable PDF.
|
||||
*
|
||||
* If tessedit_page_number is non-negative, will only process that
|
||||
* single page. Works for multi-page tiff file, or filelist.
|
||||
*
|
||||
* Returns true if successful, false on error.
|
||||
*/
|
||||
bool ProcessPages(const char *filename, const char *retry_config,
|
||||
int timeout_millisec, TessResultRenderer *renderer);
|
||||
// Does the real work of ProcessPages.
|
||||
bool ProcessPagesInternal(const char *filename, const char *retry_config,
|
||||
int timeout_millisec, TessResultRenderer *renderer);
|
||||
|
||||
/**
|
||||
* Turn a single image into symbolic text.
|
||||
*
|
||||
* The pix is the image processed. filename and page_index are
|
||||
* metadata used by side-effect processes, such as reading a box
|
||||
* file or formatting as hOCR.
|
||||
*
|
||||
* See ProcessPages for descriptions of other parameters.
|
||||
*/
|
||||
bool ProcessPage(Pix *pix, int page_index, const char *filename,
|
||||
const char *retry_config, int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
|
||||
/**
|
||||
* Get a reading-order iterator to the results of LayoutAnalysis and/or
|
||||
* Recognize. The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
ResultIterator *GetIterator();
|
||||
|
||||
/**
|
||||
* Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
|
||||
* The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
MutableIterator *GetMutableIterator();
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetUTF8Text();
|
||||
|
||||
/**
|
||||
* Make a HTML-formatted string with hOCR markup from the internal
|
||||
* data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* monitor can be used to
|
||||
* cancel the recognition
|
||||
* receive progress callbacks
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
|
||||
|
||||
/**
|
||||
* Make a HTML-formatted string with hOCR markup from the internal
|
||||
* data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetHOCRText(int page_number);
|
||||
|
||||
/**
|
||||
* Make an XML-formatted string with Alto markup from the internal
|
||||
* data structures.
|
||||
*/
|
||||
char *GetAltoText(ETEXT_DESC *monitor, int page_number);
|
||||
|
||||
/**
|
||||
* Make an XML-formatted string with Alto markup from the internal
|
||||
* data structures.
|
||||
*/
|
||||
char *GetAltoText(int page_number);
|
||||
|
||||
/**
|
||||
* Make a TSV-formatted string from the internal data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetTSVText(int page_number);
|
||||
|
||||
/**
|
||||
* Make a box file for LSTM training from the internal data structures.
|
||||
* Constructs coordinates in the original image - not just the rectangle.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetLSTMBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded in the same
|
||||
* format as a box file used in training.
|
||||
* Constructs coordinates in the original image - not just the rectangle.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded in the same
|
||||
* format as a WordStr box file used in training.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetWordStrBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UNLV format Latin-1 with specific reject and suspect codes.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetUNLVText();
|
||||
|
||||
/**
|
||||
* Detect the orientation of the input image and apparent script (alphabet).
|
||||
* orient_deg is the detected clockwise rotation of the input image in degrees
|
||||
* (0, 90, 180, 270)
|
||||
* orient_conf is the confidence (15.0 is reasonably confident)
|
||||
* script_name is an ASCII string, the name of the script, e.g. "Latin"
|
||||
* script_conf is confidence level in the script
|
||||
* Returns true on success and writes values to each parameter as an output
|
||||
*/
|
||||
bool DetectOrientationScript(int *orient_deg, float *orient_conf,
|
||||
const char **script_name, float *script_conf);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
* page_number is a 0-based page index that will appear in the osd file.
|
||||
*/
|
||||
char *GetOsdText(int page_number);
|
||||
|
||||
/** Returns the (average) confidence value between 0 and 100. */
|
||||
int MeanTextConf();
|
||||
/**
|
||||
* Returns all word confidences (between 0 and 100) in an array, terminated
|
||||
* by -1. The calling function must delete [] after use.
|
||||
* The number of confidences should correspond to the number of space-
|
||||
* delimited words in GetUTF8Text.
|
||||
*/
|
||||
int *AllWordConfidences();
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
/**
|
||||
* Applies the given word to the adaptive classifier if possible.
|
||||
* The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
|
||||
* tell the boundaries of the graphemes.
|
||||
* Assumes that SetImage/SetRectangle have been used to set the image
|
||||
* to the given word. The mode arg should be PSM_SINGLE_WORD or
|
||||
* PSM_CIRCLE_WORD, as that will be used to control layout analysis.
|
||||
* The currently set PageSegMode is preserved.
|
||||
* Returns false if adaption was not possible for some reason.
|
||||
*/
|
||||
bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Free up recognition results and any stored image data, without actually
|
||||
* freeing any recognition data that would be time-consuming to reload.
|
||||
* Afterwards, you must call SetImage or TesseractRect before doing
|
||||
* any Recognize or Get* operation.
|
||||
*/
|
||||
void Clear();
|
||||
|
||||
/**
|
||||
* Close down tesseract and free up all memory. End() is equivalent to
|
||||
* destructing and reconstructing your TessBaseAPI.
|
||||
* Once End() has been used, none of the other API functions may be used
|
||||
* other than Init and anything declared above it in the class definition.
|
||||
*/
|
||||
void End();
|
||||
|
||||
/**
|
||||
* Clear any library-level memory caches.
|
||||
* There are a variety of expensive-to-load constant data structures (mostly
|
||||
* language dictionaries) that are cached globally -- surviving the Init()
|
||||
* and End() of individual TessBaseAPI's. This function allows the clearing
|
||||
* of these caches.
|
||||
**/
|
||||
static void ClearPersistentCache();
|
||||
|
||||
/**
|
||||
* Check whether a word is valid according to Tesseract's language model
|
||||
* @return 0 if the word is invalid, non-zero if valid.
|
||||
* @warning temporary! This function will be removed from here and placed
|
||||
* in a separate API at some future time.
|
||||
*/
|
||||
int IsValidWord(const char *word) const;
|
||||
// Returns true if utf8_character is defined in the UniCharset.
|
||||
bool IsValidCharacter(const char *utf8_character) const;
|
||||
|
||||
bool GetTextDirection(int *out_offset, float *out_slope);
|
||||
|
||||
/** Sets Dict::letter_is_okay_ function to point to the given function. */
|
||||
void SetDictFunc(DictFunc f);
|
||||
|
||||
/** Sets Dict::probability_in_context_ function to point to the given
|
||||
* function.
|
||||
*/
|
||||
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
|
||||
|
||||
/**
|
||||
* Estimates the Orientation And Script of the image.
|
||||
* @return true if the image was processed successfully.
|
||||
*/
|
||||
bool DetectOS(OSResults *);
|
||||
|
||||
/**
|
||||
* Return text orientation of each block as determined by an earlier run
|
||||
* of layout analysis.
|
||||
*/
|
||||
void GetBlockTextOrientations(int **block_orientation,
|
||||
bool **vertical_writing);
|
||||
|
||||
/** This method returns the string form of the specified unichar. */
|
||||
const char *GetUnichar(int unichar_id) const;
|
||||
|
||||
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
|
||||
const Dawg *GetDawg(int i) const;
|
||||
|
||||
/** Return the number of dawgs loaded into tesseract_ object. */
|
||||
int NumDawgs() const;
|
||||
|
||||
Tesseract *tesseract() const {
|
||||
return tesseract_;
|
||||
}
|
||||
|
||||
OcrEngineMode oem() const {
|
||||
return last_oem_requested_;
|
||||
}
|
||||
|
||||
void set_min_orientation_margin(double margin);
|
||||
/* @} */
|
||||
|
||||
protected:
|
||||
/** Common code for setting the image. Returns true if Init has been called.
|
||||
*/
|
||||
bool InternalSetImage();
|
||||
|
||||
/**
|
||||
* Run the thresholder to make the thresholded image. If pix is not nullptr,
|
||||
* the source is thresholded to pix instead of the internal IMAGE.
|
||||
*/
|
||||
virtual bool Threshold(Pix **pix);
|
||||
|
||||
/**
|
||||
* Find lines from the image making the BLOCK_LIST.
|
||||
* @return 0 on success.
|
||||
*/
|
||||
int FindLines();
|
||||
|
||||
/** Delete the pageres and block list ready for a new page. */
|
||||
void ClearResults();
|
||||
|
||||
/**
|
||||
* Return an LTR Result Iterator -- used only for training, as we really want
|
||||
* to ignore all BiDi smarts at that point.
|
||||
* delete once you're done with it.
|
||||
*/
|
||||
LTRResultIterator *GetLTRIterator();
|
||||
|
||||
/**
|
||||
* Return the length of the output text string, as UTF8, assuming
|
||||
* one newline per line and one per block, with a terminator,
|
||||
* and assuming a single character reject marker for each rejected character.
|
||||
* Also return the number of recognized blobs in blob_count.
|
||||
*/
|
||||
int TextLength(int *blob_count) const;
|
||||
|
||||
//// paragraphs.cpp ////////////////////////////////////////////////////
|
||||
void DetectParagraphs(bool after_text_recognition);
|
||||
|
||||
const PAGE_RES *GetPageRes() const {
|
||||
return page_res_;
|
||||
}
|
||||
|
||||
protected:
|
||||
Tesseract *tesseract_; ///< The underlying data object.
|
||||
Tesseract *osd_tesseract_; ///< For orientation & script detection.
|
||||
EquationDetect *equ_detect_; ///< The equation detector.
|
||||
FileReader reader_; ///< Reads files from any filesystem.
|
||||
ImageThresholder *thresholder_; ///< Image thresholding module.
|
||||
std::vector<ParagraphModel *> *paragraph_models_;
|
||||
BLOCK_LIST *block_list_; ///< The page layout.
|
||||
PAGE_RES *page_res_; ///< The page-level data.
|
||||
std::string input_file_; ///< Name used by training code.
|
||||
std::string output_file_; ///< Name used by debug code.
|
||||
std::string datapath_; ///< Current location of tessdata.
|
||||
std::string language_; ///< Last initialized language.
|
||||
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
|
||||
bool recognition_done_; ///< page_res_ contains recognition data.
|
||||
|
||||
/**
|
||||
* @defgroup ThresholderParams Thresholder Parameters
|
||||
* Parameters saved from the Thresholder. Needed to rebuild coordinates.
|
||||
*/
|
||||
/* @{ */
|
||||
int rect_left_;
|
||||
int rect_top_;
|
||||
int rect_width_;
|
||||
int rect_height_;
|
||||
int image_width_;
|
||||
int image_height_;
|
||||
/* @} */
|
||||
|
||||
private:
|
||||
// A list of image filenames gets special consideration
|
||||
bool ProcessPagesFileList(FILE *fp, std::string *buf,
|
||||
const char *retry_config, int timeout_millisec,
|
||||
TessResultRenderer *renderer,
|
||||
int tessedit_page_number);
|
||||
// TIFF supports multipage so gets special consideration.
|
||||
bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
|
||||
const char *filename, const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer,
|
||||
int tessedit_page_number);
|
||||
}; // class TessBaseAPI.
|
||||
|
||||
/** Escape a char string - remove &<>"' with HTML codes. */
|
||||
std::string HOcrEscape(const char *text);
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_API_BASEAPI_H_
|
|
@ -1,484 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: capi.h
|
||||
// Description: C-API TessBaseAPI
|
||||
//
|
||||
// (C) Copyright 2012, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef API_CAPI_H_
|
||||
#define API_CAPI_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
# include <tesseract/baseapi.h>
|
||||
# include <tesseract/ocrclass.h>
|
||||
# include <tesseract/pageiterator.h>
|
||||
# include <tesseract/renderer.h>
|
||||
# include <tesseract/resultiterator.h>
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef BOOL
|
||||
# define BOOL int
|
||||
# define TRUE 1
|
||||
# define FALSE 0
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
typedef tesseract::TessResultRenderer TessResultRenderer;
|
||||
typedef tesseract::TessBaseAPI TessBaseAPI;
|
||||
typedef tesseract::PageIterator TessPageIterator;
|
||||
typedef tesseract::ResultIterator TessResultIterator;
|
||||
typedef tesseract::MutableIterator TessMutableIterator;
|
||||
typedef tesseract::ChoiceIterator TessChoiceIterator;
|
||||
typedef tesseract::OcrEngineMode TessOcrEngineMode;
|
||||
typedef tesseract::PageSegMode TessPageSegMode;
|
||||
typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
|
||||
typedef tesseract::Orientation TessOrientation;
|
||||
typedef tesseract::ParagraphJustification TessParagraphJustification;
|
||||
typedef tesseract::WritingDirection TessWritingDirection;
|
||||
typedef tesseract::TextlineOrder TessTextlineOrder;
|
||||
typedef tesseract::PolyBlockType TessPolyBlockType;
|
||||
typedef tesseract::ETEXT_DESC ETEXT_DESC;
|
||||
#else
|
||||
typedef struct TessResultRenderer TessResultRenderer;
|
||||
typedef struct TessBaseAPI TessBaseAPI;
|
||||
typedef struct TessPageIterator TessPageIterator;
|
||||
typedef struct TessResultIterator TessResultIterator;
|
||||
typedef struct TessMutableIterator TessMutableIterator;
|
||||
typedef struct TessChoiceIterator TessChoiceIterator;
|
||||
typedef enum TessOcrEngineMode {
|
||||
OEM_TESSERACT_ONLY,
|
||||
OEM_LSTM_ONLY,
|
||||
OEM_TESSERACT_LSTM_COMBINED,
|
||||
OEM_DEFAULT
|
||||
} TessOcrEngineMode;
|
||||
typedef enum TessPageSegMode {
|
||||
PSM_OSD_ONLY,
|
||||
PSM_AUTO_OSD,
|
||||
PSM_AUTO_ONLY,
|
||||
PSM_AUTO,
|
||||
PSM_SINGLE_COLUMN,
|
||||
PSM_SINGLE_BLOCK_VERT_TEXT,
|
||||
PSM_SINGLE_BLOCK,
|
||||
PSM_SINGLE_LINE,
|
||||
PSM_SINGLE_WORD,
|
||||
PSM_CIRCLE_WORD,
|
||||
PSM_SINGLE_CHAR,
|
||||
PSM_SPARSE_TEXT,
|
||||
PSM_SPARSE_TEXT_OSD,
|
||||
PSM_RAW_LINE,
|
||||
PSM_COUNT
|
||||
} TessPageSegMode;
|
||||
typedef enum TessPageIteratorLevel {
|
||||
RIL_BLOCK,
|
||||
RIL_PARA,
|
||||
RIL_TEXTLINE,
|
||||
RIL_WORD,
|
||||
RIL_SYMBOL
|
||||
} TessPageIteratorLevel;
|
||||
typedef enum TessPolyBlockType {
|
||||
PT_UNKNOWN,
|
||||
PT_FLOWING_TEXT,
|
||||
PT_HEADING_TEXT,
|
||||
PT_PULLOUT_TEXT,
|
||||
PT_EQUATION,
|
||||
PT_INLINE_EQUATION,
|
||||
PT_TABLE,
|
||||
PT_VERTICAL_TEXT,
|
||||
PT_CAPTION_TEXT,
|
||||
PT_FLOWING_IMAGE,
|
||||
PT_HEADING_IMAGE,
|
||||
PT_PULLOUT_IMAGE,
|
||||
PT_HORZ_LINE,
|
||||
PT_VERT_LINE,
|
||||
PT_NOISE,
|
||||
PT_COUNT
|
||||
} TessPolyBlockType;
|
||||
typedef enum TessOrientation {
|
||||
ORIENTATION_PAGE_UP,
|
||||
ORIENTATION_PAGE_RIGHT,
|
||||
ORIENTATION_PAGE_DOWN,
|
||||
ORIENTATION_PAGE_LEFT
|
||||
} TessOrientation;
|
||||
typedef enum TessParagraphJustification {
|
||||
JUSTIFICATION_UNKNOWN,
|
||||
JUSTIFICATION_LEFT,
|
||||
JUSTIFICATION_CENTER,
|
||||
JUSTIFICATION_RIGHT
|
||||
} TessParagraphJustification;
|
||||
typedef enum TessWritingDirection {
|
||||
WRITING_DIRECTION_LEFT_TO_RIGHT,
|
||||
WRITING_DIRECTION_RIGHT_TO_LEFT,
|
||||
WRITING_DIRECTION_TOP_TO_BOTTOM
|
||||
} TessWritingDirection;
|
||||
typedef enum TessTextlineOrder {
|
||||
TEXTLINE_ORDER_LEFT_TO_RIGHT,
|
||||
TEXTLINE_ORDER_RIGHT_TO_LEFT,
|
||||
TEXTLINE_ORDER_TOP_TO_BOTTOM
|
||||
} TessTextlineOrder;
|
||||
typedef struct ETEXT_DESC ETEXT_DESC;
|
||||
#endif
|
||||
|
||||
typedef bool (*TessCancelFunc)(void *cancel_this, int words);
|
||||
typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
|
||||
int bottom);
|
||||
|
||||
struct Pix;
|
||||
struct Boxa;
|
||||
struct Pixa;
|
||||
|
||||
/* General free functions */
|
||||
|
||||
TESS_API const char *TessVersion();
|
||||
TESS_API void TessDeleteText(const char *text);
|
||||
TESS_API void TessDeleteTextArray(char **arr);
|
||||
TESS_API void TessDeleteIntArray(const int *arr);
|
||||
|
||||
/* Renderer API */
|
||||
TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
|
||||
BOOL font_info);
|
||||
TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
|
||||
const char *datadir,
|
||||
BOOL textonly);
|
||||
TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
|
||||
const char *outputbase);
|
||||
|
||||
TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
|
||||
TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
|
||||
TessResultRenderer *next);
|
||||
TESS_API TessResultRenderer *TessResultRendererNext(
|
||||
TessResultRenderer *renderer);
|
||||
TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
|
||||
const char *title);
|
||||
TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
|
||||
TessBaseAPI *api);
|
||||
TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
|
||||
|
||||
TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
|
||||
TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
|
||||
TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
|
||||
|
||||
/* Base API */
|
||||
|
||||
TESS_API TessBaseAPI *TessBaseAPICreate();
|
||||
TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
|
||||
|
||||
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
|
||||
|
||||
TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
|
||||
TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
|
||||
TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
|
||||
TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
|
||||
|
||||
TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
|
||||
const char *value);
|
||||
TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
|
||||
const char *value);
|
||||
|
||||
TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
|
||||
const char *name, int *value);
|
||||
TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
|
||||
const char *name, BOOL *value);
|
||||
TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
|
||||
const char *name, double *value);
|
||||
TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
|
||||
const char *name);
|
||||
|
||||
TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
|
||||
TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
|
||||
TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode oem,
|
||||
char **configs, int configs_size);
|
||||
TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode oem);
|
||||
TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language);
|
||||
|
||||
TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode mode,
|
||||
char **configs, int configs_size, char **vars_vec,
|
||||
char **vars_values, size_t vars_vec_size,
|
||||
BOOL set_only_non_debug_params);
|
||||
|
||||
TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
|
||||
const char *language, TessOcrEngineMode mode,
|
||||
char **configs, int configs_size, char **vars_vec,
|
||||
char **vars_values, size_t vars_vec_size,
|
||||
BOOL set_only_non_debug_params);
|
||||
|
||||
TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
|
||||
const TessBaseAPI *handle);
|
||||
TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
|
||||
const TessBaseAPI *handle);
|
||||
TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
|
||||
const TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
|
||||
TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
|
||||
TessPageSegMode mode);
|
||||
TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
|
||||
const unsigned char *imagedata,
|
||||
int bytes_per_pixel, int bytes_per_line,
|
||||
int left, int top, int width, int height);
|
||||
|
||||
TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
|
||||
const unsigned char *imagedata, int width,
|
||||
int height, int bytes_per_pixel,
|
||||
int bytes_per_line);
|
||||
TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
|
||||
|
||||
TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
|
||||
|
||||
TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
|
||||
int width, int height);
|
||||
|
||||
TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
|
||||
TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
|
||||
struct Pixa **pixa);
|
||||
TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
|
||||
struct Pixa **pixa,
|
||||
int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
|
||||
BOOL raw_image, int raw_padding,
|
||||
struct Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
|
||||
struct Pixa **pixa, int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
|
||||
struct Pixa **pixa);
|
||||
TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
|
||||
struct Pixa **cc);
|
||||
TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
|
||||
TessPageIteratorLevel level,
|
||||
BOOL text_only,
|
||||
struct Pixa **pixa,
|
||||
int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
|
||||
TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
|
||||
BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
|
||||
int **paraids);
|
||||
|
||||
TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
|
||||
const TessBaseAPI *handle);
|
||||
|
||||
TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
|
||||
|
||||
TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
|
||||
const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
|
||||
int page_index, const char *filename,
|
||||
const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
|
||||
TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
|
||||
TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
|
||||
TessBaseAPI *handle);
|
||||
|
||||
TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
|
||||
TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
|
||||
int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
|
||||
TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
|
||||
TessPageSegMode mode,
|
||||
const char *wordstr);
|
||||
#endif // #ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
|
||||
TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
|
||||
TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
|
||||
float *out_slope);
|
||||
|
||||
TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
|
||||
|
||||
TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
// Call TessDeleteText(*best_script_name) to free memory allocated by this
|
||||
// function
|
||||
TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
|
||||
int *orient_deg,
|
||||
float *orient_conf,
|
||||
const char **script_name,
|
||||
float *script_conf);
|
||||
#endif // #ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
|
||||
double margin);
|
||||
|
||||
TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
|
||||
int **block_orientation,
|
||||
bool **vertical_writing);
|
||||
|
||||
/* Page iterator */
|
||||
|
||||
TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
|
||||
|
||||
TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
|
||||
|
||||
TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
|
||||
|
||||
TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
|
||||
TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
|
||||
TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
TessPageIteratorLevel element);
|
||||
|
||||
TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
int *left, int *top, int *right,
|
||||
int *bottom);
|
||||
|
||||
TESS_API TessPolyBlockType
|
||||
TessPageIteratorBlockType(const TessPageIterator *handle);
|
||||
|
||||
TESS_API struct Pix *TessPageIteratorGetBinaryImage(
|
||||
const TessPageIterator *handle, TessPageIteratorLevel level);
|
||||
|
||||
TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
int padding,
|
||||
struct Pix *original_image,
|
||||
int *left, int *top);
|
||||
|
||||
TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level, int *x1,
|
||||
int *y1, int *x2, int *y2);
|
||||
|
||||
TESS_API void TessPageIteratorOrientation(
|
||||
TessPageIterator *handle, TessOrientation *orientation,
|
||||
TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
|
||||
float *deskew_angle);
|
||||
|
||||
TESS_API void TessPageIteratorParagraphInfo(
|
||||
TessPageIterator *handle, TessParagraphJustification *justification,
|
||||
BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
|
||||
|
||||
/* Result iterator */
|
||||
|
||||
TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
|
||||
TESS_API TessResultIterator *TessResultIteratorCopy(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
|
||||
TessResultIterator *handle);
|
||||
TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
|
||||
const TessResultIterator *handle);
|
||||
|
||||
TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API const char *TessResultIteratorWordRecognitionLanguage(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API const char *TessResultIteratorWordFontAttributes(
|
||||
const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
|
||||
BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
|
||||
int *pointsize, int *font_id);
|
||||
|
||||
TESS_API BOOL
|
||||
TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
|
||||
TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
|
||||
|
||||
TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
|
||||
TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
|
||||
TESS_API const char *TessChoiceIteratorGetUTF8Text(
|
||||
const TessChoiceIterator *handle);
|
||||
TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
|
||||
|
||||
/* Progress monitor */
|
||||
|
||||
TESS_API ETEXT_DESC *TessMonitorCreate();
|
||||
TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
|
||||
TessCancelFunc cancelFunc);
|
||||
TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
|
||||
TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
|
||||
TessProgressFunc progressFunc);
|
||||
TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // API_CAPI_H_
|
|
@ -1,37 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: export.h
|
||||
// Description: Place holder
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_PLATFORM_H_
|
||||
#define TESSERACT_PLATFORM_H_
|
||||
|
||||
#ifndef TESS_API
|
||||
# if defined(_WIN32) || defined(__CYGWIN__)
|
||||
# if defined(TESS_EXPORTS)
|
||||
# define TESS_API __declspec(dllexport)
|
||||
# elif defined(TESS_IMPORTS)
|
||||
# define TESS_API __declspec(dllimport)
|
||||
# else
|
||||
# define TESS_API
|
||||
# endif
|
||||
# else
|
||||
# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
|
||||
# define TESS_API __attribute__((visibility("default")))
|
||||
# else
|
||||
# define TESS_API
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif // TESSERACT_PLATFORM_H_
|
|
@ -1,235 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: ltrresultiterator.h
|
||||
// Description: Iterator for tesseract results in strict left-to-right
|
||||
// order that avoids using tesseract internal data structures.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
||||
|
||||
#include "export.h" // for TESS_API
|
||||
#include "pageiterator.h" // for PageIterator
|
||||
#include "publictypes.h" // for PageIteratorLevel
|
||||
#include "unichar.h" // for StrongScriptDirection
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class BLOB_CHOICE_IT;
|
||||
class PAGE_RES;
|
||||
class WERD_RES;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
// Class to iterate over tesseract results, providing access to all levels
|
||||
// of the page hierarchy, without including any tesseract headers or having
|
||||
// to handle any tesseract structures.
|
||||
// WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
// therefore can only be used while the TessBaseAPI class still exists and
|
||||
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
// DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
// See tesseract/publictypes.h for the definition of PageIteratorLevel.
|
||||
// See also base class PageIterator, which contains the bulk of the interface.
|
||||
// LTRResultIterator adds text-specific methods for access to OCR output.
|
||||
|
||||
class TESS_API LTRResultIterator : public PageIterator {
|
||||
friend class ChoiceIterator;
|
||||
|
||||
public:
|
||||
// page_res and tesseract come directly from the BaseAPI.
|
||||
// The rectangle parameters are copied indirectly from the Thresholder,
|
||||
// via the BaseAPI. They represent the coordinates of some rectangle in an
|
||||
// original image (in top-left-origin coordinates) and therefore the top-left
|
||||
// needs to be added to any output boxes in order to specify coordinates
|
||||
// in the original image. See TessBaseAPI::SetRectangle.
|
||||
// The scale and scaled_yres are in case the Thresholder scaled the image
|
||||
// rectangle prior to thresholding. Any coordinates in tesseract's image
|
||||
// must be divided by scale before adding (rect_left, rect_top).
|
||||
// The scaled_yres indicates the effective resolution of the binary image
|
||||
// that tesseract has been given by the Thresholder.
|
||||
// After the constructor, Begin has already been called.
|
||||
LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
|
||||
int scaled_yres, int rect_left, int rect_top,
|
||||
int rect_width, int rect_height);
|
||||
|
||||
~LTRResultIterator() override;
|
||||
|
||||
// LTRResultIterators may be copied! This makes it possible to iterate over
|
||||
// all the objects at a lower level, while maintaining an iterator to
|
||||
// objects at a higher level. These constructors DO NOT CALL Begin, so
|
||||
// iterations will continue from the location of src.
|
||||
// TODO: For now the copy constructor and operator= only need the base class
|
||||
// versions, but if new data members are added, don't forget to add them!
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
|
||||
// See PageIterator.
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
// Returns the null terminated UTF-8 encoded text string for the current
|
||||
// object at the given level. Use delete [] to free after use.
|
||||
char *GetUTF8Text(PageIteratorLevel level) const;
|
||||
|
||||
// Set the string inserted at the end of each text line. "\n" by default.
|
||||
void SetLineSeparator(const char *new_line);
|
||||
|
||||
// Set the string inserted at the end of each paragraph. "\n" by default.
|
||||
void SetParagraphSeparator(const char *new_para);
|
||||
|
||||
// Returns the mean confidence of the current object at the given level.
|
||||
// The number should be interpreted as a percent probability. (0.0f-100.0f)
|
||||
float Confidence(PageIteratorLevel level) const;
|
||||
|
||||
// ============= Functions that refer to words only ============.
|
||||
|
||||
// Returns the font attributes of the current word. If iterating at a higher
|
||||
// level object than words, eg textlines, then this will return the
|
||||
// attributes of the first word in that textline.
|
||||
// The actual return value is a string representing a font name. It points
|
||||
// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
|
||||
// the iterator itself, ie rendered invalid by various members of
|
||||
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
|
||||
// Pointsize is returned in printers points (1/72 inch.)
|
||||
const char *WordFontAttributes(bool *is_bold, bool *is_italic,
|
||||
bool *is_underlined, bool *is_monospace,
|
||||
bool *is_serif, bool *is_smallcaps,
|
||||
int *pointsize, int *font_id) const;
|
||||
|
||||
// Return the name of the language used to recognize this word.
|
||||
// On error, nullptr. Do not delete this pointer.
|
||||
const char *WordRecognitionLanguage() const;
|
||||
|
||||
// Return the overall directionality of this word.
|
||||
StrongScriptDirection WordDirection() const;
|
||||
|
||||
// Returns true if the current word was found in a dictionary.
|
||||
bool WordIsFromDictionary() const;
|
||||
|
||||
// Returns the number of blanks before the current word.
|
||||
int BlanksBeforeWord() const;
|
||||
|
||||
// Returns true if the current word is numeric.
|
||||
bool WordIsNumeric() const;
|
||||
|
||||
// Returns true if the word contains blamer information.
|
||||
bool HasBlamerInfo() const;
|
||||
|
||||
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
|
||||
// of the current word.
|
||||
const void *GetParamsTrainingBundle() const;
|
||||
|
||||
// Returns a pointer to the string with blamer information for this word.
|
||||
// Assumes that the word's blamer_bundle is not nullptr.
|
||||
const char *GetBlamerDebug() const;
|
||||
|
||||
// Returns a pointer to the string with misadaption information for this word.
|
||||
// Assumes that the word's blamer_bundle is not nullptr.
|
||||
const char *GetBlamerMisadaptionDebug() const;
|
||||
|
||||
// Returns true if a truth string was recorded for the current word.
|
||||
bool HasTruthString() const;
|
||||
|
||||
// Returns true if the given string is equivalent to the truth string for
|
||||
// the current word.
|
||||
bool EquivalentToTruth(const char *str) const;
|
||||
|
||||
// Returns a null terminated UTF-8 encoded truth string for the current word.
|
||||
// Use delete [] to free after use.
|
||||
char *WordTruthUTF8Text() const;
|
||||
|
||||
// Returns a null terminated UTF-8 encoded normalized OCR string for the
|
||||
// current word. Use delete [] to free after use.
|
||||
char *WordNormedUTF8Text() const;
|
||||
|
||||
// Returns a pointer to serialized choice lattice.
|
||||
// Fills lattice_size with the number of bytes in lattice data.
|
||||
const char *WordLattice(int *lattice_size) const;
|
||||
|
||||
// ============= Functions that refer to symbols only ============.
|
||||
|
||||
// Returns true if the current symbol is a superscript.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsSuperscript() const;
|
||||
// Returns true if the current symbol is a subscript.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsSubscript() const;
|
||||
// Returns true if the current symbol is a dropcap.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsDropcap() const;
|
||||
|
||||
protected:
|
||||
const char *line_separator_;
|
||||
const char *paragraph_separator_;
|
||||
};
|
||||
|
||||
// Class to iterate over the classifier choices for a single RIL_SYMBOL.
|
||||
class TESS_API ChoiceIterator {
|
||||
public:
|
||||
// Construction is from a LTRResultIterator that points to the symbol of
|
||||
// interest. The ChoiceIterator allows a one-shot iteration over the
|
||||
// choices for this symbol and after that it is useless.
|
||||
explicit ChoiceIterator(const LTRResultIterator &result_it);
|
||||
~ChoiceIterator();
|
||||
|
||||
// Moves to the next choice for the symbol and returns false if there
|
||||
// are none left.
|
||||
bool Next();
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
// Returns the null terminated UTF-8 encoded text string for the current
|
||||
// choice.
|
||||
// NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
|
||||
// internal structure and should NOT be delete[]ed to free after use.
|
||||
const char *GetUTF8Text() const;
|
||||
|
||||
// Returns the confidence of the current choice depending on the used language
|
||||
// data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
|
||||
// choices for one symbol should roughly add up to 1.0f.
|
||||
// If only traineddata of the legacy engine is used, the number should be
|
||||
// interpreted as a percent probability. (0.0f-100.0f) In this case
|
||||
// probabilities won't add up to 100. Each one stands on its own.
|
||||
float Confidence() const;
|
||||
|
||||
// Returns a vector containing all timesteps, which belong to the currently
|
||||
// selected symbol. A timestep is a vector containing pairs of symbols and
|
||||
// floating point numbers. The number states the probability for the
|
||||
// corresponding symbol.
|
||||
std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
|
||||
|
||||
private:
|
||||
// clears the remaining spaces out of the results and adapt the probabilities
|
||||
void filterSpaces();
|
||||
// Pointer to the WERD_RES object owned by the API.
|
||||
WERD_RES *word_res_;
|
||||
// Iterator over the blob choices.
|
||||
BLOB_CHOICE_IT *choice_it_;
|
||||
std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
|
||||
std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
|
||||
|
||||
const int *tstep_index_;
|
||||
// regulates the rating granularity
|
||||
double rating_coefficient_;
|
||||
// leading blanks
|
||||
int blanks_before_word_;
|
||||
// true when there is lstm engine related trained data
|
||||
bool oemLSTM_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
|
@ -1,158 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
/**********************************************************************
|
||||
* File: ocrclass.h
|
||||
* Description: Class definitions and constants for the OCR API.
|
||||
* Author: Hewlett-Packard Co
|
||||
*
|
||||
* (C) Copyright 1996, Hewlett-Packard Co.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
/**********************************************************************
|
||||
* This file contains typedefs for all the structures used by
|
||||
* the HP OCR interface.
|
||||
* The structures are designed to allow them to be used with any
|
||||
* structure alignment up to 8.
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef CCUTIL_OCRCLASS_H_
|
||||
#define CCUTIL_OCRCLASS_H_
|
||||
|
||||
#include <chrono>
|
||||
#include <ctime>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/**********************************************************************
|
||||
* EANYCODE_CHAR
|
||||
* Description of a single character. The character code is defined by
|
||||
* the character set of the current font.
|
||||
* Output text is sent as an array of these structures.
|
||||
* Spaces and line endings in the output are represented in the
|
||||
* structures of the surrounding characters. They are not directly
|
||||
* represented as characters.
|
||||
* The first character in a word has a positive value of blanks.
|
||||
* Missing information should be set to the defaults in the comments.
|
||||
* If word bounds are known, but not character bounds, then the top and
|
||||
* bottom of each character should be those of the word. The left of the
|
||||
* first and right of the last char in each word should be set. All other
|
||||
* lefts and rights should be set to -1.
|
||||
* If set, the values of right and bottom are left+width and top+height.
|
||||
* Most of the members come directly from the parameters to ocr_append_char.
|
||||
* The formatting member uses the enhancement parameter and combines the
|
||||
* line direction stuff into the top 3 bits.
|
||||
* The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
|
||||
* 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
|
||||
* the coding is, only that it is backwards compatible with the previous
|
||||
* version.
|
||||
**********************************************************************/
|
||||
|
||||
struct EANYCODE_CHAR { /*single character */
|
||||
// It should be noted that the format for char_code for version 2.0 and beyond
|
||||
// is UTF8 which means that ASCII characters will come out as one structure
|
||||
// but other characters will be returned in two or more instances of this
|
||||
// structure with a single byte of the UTF8 code in each, but each will have
|
||||
// the same bounding box. Programs which want to handle languagues with
|
||||
// different characters sets will need to handle extended characters
|
||||
// appropriately, but *all* code needs to be prepared to receive UTF8 coded
|
||||
// characters for characters such as bullet and fancy quotes.
|
||||
uint16_t char_code; /*character itself */
|
||||
int16_t left; /*of char (-1) */
|
||||
int16_t right; /*of char (-1) */
|
||||
int16_t top; /*of char (-1) */
|
||||
int16_t bottom; /*of char (-1) */
|
||||
int16_t font_index; /*what font (0) */
|
||||
uint8_t confidence; /*0=perfect, 100=reject (0/100) */
|
||||
uint8_t point_size; /*of char, 72=i inch, (10) */
|
||||
int8_t blanks; /*no of spaces before this char (1) */
|
||||
uint8_t formatting; /*char formatting (0) */
|
||||
};
|
||||
|
||||
/**********************************************************************
|
||||
* ETEXT_DESC
|
||||
* Description of the output of the OCR engine.
|
||||
* This structure is used as both a progress monitor and the final
|
||||
* output header, since it needs to be a valid progress monitor while
|
||||
* the OCR engine is storing its output to shared memory.
|
||||
* During progress, all the buffer info is -1.
|
||||
* Progress starts at 0 and increases to 100 during OCR. No other constraint.
|
||||
* Additionally the progress callback contains the bounding box of the word that
|
||||
* is currently being processed.
|
||||
* Every progress callback, the OCR engine must set ocr_alive to 1.
|
||||
* The HP side will set ocr_alive to 0. Repeated failure to reset
|
||||
* to 1 indicates that the OCR engine is dead.
|
||||
* If the cancel function is not null then it is called with the number of
|
||||
* user words found. If it returns true then operation is cancelled.
|
||||
**********************************************************************/
|
||||
class ETEXT_DESC;
|
||||
|
||||
using CANCEL_FUNC = bool (*)(void *, int);
|
||||
using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
|
||||
using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
|
||||
|
||||
class ETEXT_DESC { // output header
|
||||
public:
|
||||
int16_t count{0}; /// chars in this buffer(0)
|
||||
int16_t progress{0}; /// percent complete increasing (0-100)
|
||||
/** Progress monitor covers word recognition and it does not cover layout
|
||||
* analysis.
|
||||
* See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
|
||||
int8_t more_to_come{0}; /// true if not last
|
||||
volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
|
||||
int8_t err_code{0}; /// for errcode use
|
||||
CANCEL_FUNC cancel{nullptr}; /// returns true to cancel
|
||||
PROGRESS_FUNC progress_callback{
|
||||
nullptr}; /// called whenever progress increases
|
||||
PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
|
||||
void *cancel_this{nullptr}; /// this or other data for cancel
|
||||
std::chrono::steady_clock::time_point end_time;
|
||||
/// Time to stop. Expected to be set only
|
||||
/// by call to set_deadline_msecs().
|
||||
EANYCODE_CHAR text[1]{}; /// character data
|
||||
|
||||
ETEXT_DESC() : progress_callback2(&default_progress_func) {
|
||||
end_time = std::chrono::time_point<std::chrono::steady_clock,
|
||||
std::chrono::milliseconds>();
|
||||
}
|
||||
|
||||
// Sets the end time to be deadline_msecs milliseconds from now.
|
||||
void set_deadline_msecs(int32_t deadline_msecs) {
|
||||
if (deadline_msecs > 0) {
|
||||
end_time = std::chrono::steady_clock::now() +
|
||||
std::chrono::milliseconds(deadline_msecs);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns false if we've not passed the end_time, or have not set a deadline.
|
||||
bool deadline_exceeded() const {
|
||||
if (end_time.time_since_epoch() ==
|
||||
std::chrono::steady_clock::duration::zero()) {
|
||||
return false;
|
||||
}
|
||||
auto now = std::chrono::steady_clock::now();
|
||||
return (now > end_time);
|
||||
}
|
||||
|
||||
private:
|
||||
static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
|
||||
int top, int bottom) {
|
||||
if (ths->progress_callback != nullptr) {
|
||||
return (*(ths->progress_callback))(ths->progress, left, right, top,
|
||||
bottom);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // CCUTIL_OCRCLASS_H_
|
|
@ -1,139 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: osdetect.h
|
||||
// Description: Orientation and script detection.
|
||||
// Author: Samuel Charron
|
||||
// Ranjith Unnikrishnan
|
||||
//
|
||||
// (C) Copyright 2008, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_OSDETECT_H_
|
||||
#define TESSERACT_CCMAIN_OSDETECT_H_
|
||||
|
||||
#include "export.h" // for TESS_API
|
||||
|
||||
#include <vector> // for std::vector
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class BLOBNBOX;
|
||||
class BLOBNBOX_CLIST;
|
||||
class BLOB_CHOICE_LIST;
|
||||
class TO_BLOCK_LIST;
|
||||
class UNICHARSET;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
|
||||
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
|
||||
|
||||
struct OSBestResult {
|
||||
OSBestResult()
|
||||
: orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
|
||||
int orientation_id;
|
||||
int script_id;
|
||||
float sconfidence;
|
||||
float oconfidence;
|
||||
};
|
||||
|
||||
struct OSResults {
|
||||
OSResults() : unicharset(nullptr) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
|
||||
scripts_na[i][j] = 0;
|
||||
}
|
||||
orientations[i] = 0;
|
||||
}
|
||||
}
|
||||
void update_best_orientation();
|
||||
// Set the estimate of the orientation to the given id.
|
||||
void set_best_orientation(int orientation_id);
|
||||
// Update/Compute the best estimate of the script assuming the given
|
||||
// orientation id.
|
||||
void update_best_script(int orientation_id);
|
||||
// Return the index of the script with the highest score for this orientation.
|
||||
TESS_API int get_best_script(int orientation_id) const;
|
||||
// Accumulate scores with given OSResults instance and update the best script.
|
||||
void accumulate(const OSResults &osr);
|
||||
|
||||
// Print statistics.
|
||||
void print_scores(void) const;
|
||||
void print_scores(int orientation_id) const;
|
||||
|
||||
// Array holding scores for each orientation id [0,3].
|
||||
// Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
|
||||
// page respectively, where the values refer to the amount of clockwise
|
||||
// rotation to be applied to the page for the text to be upright and readable.
|
||||
float orientations[4];
|
||||
// Script confidence scores for each of 4 possible orientations.
|
||||
float scripts_na[4][kMaxNumberOfScripts];
|
||||
|
||||
UNICHARSET *unicharset;
|
||||
OSBestResult best_result;
|
||||
};
|
||||
|
||||
class OrientationDetector {
|
||||
public:
|
||||
OrientationDetector(const std::vector<int> *allowed_scripts,
|
||||
OSResults *results);
|
||||
bool detect_blob(BLOB_CHOICE_LIST *scores);
|
||||
int get_orientation();
|
||||
|
||||
private:
|
||||
OSResults *osr_;
|
||||
const std::vector<int> *allowed_scripts_;
|
||||
};
|
||||
|
||||
class ScriptDetector {
|
||||
public:
|
||||
ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
void detect_blob(BLOB_CHOICE_LIST *scores);
|
||||
bool must_stop(int orientation) const;
|
||||
|
||||
private:
|
||||
OSResults *osr_;
|
||||
static const char *korean_script_;
|
||||
static const char *japanese_script_;
|
||||
static const char *fraktur_script_;
|
||||
int korean_id_;
|
||||
int japanese_id_;
|
||||
int katakana_id_;
|
||||
int hiragana_id_;
|
||||
int han_id_;
|
||||
int hangul_id_;
|
||||
int latin_id_;
|
||||
int fraktur_id_;
|
||||
tesseract::Tesseract *tess_;
|
||||
const std::vector<int> *allowed_scripts_;
|
||||
};
|
||||
|
||||
int orientation_and_script_detection(const char *filename, OSResults *,
|
||||
tesseract::Tesseract *);
|
||||
|
||||
int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
|
||||
int os_detect_blobs(const std::vector<int> *allowed_scripts,
|
||||
BLOBNBOX_CLIST *blob_list, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
|
||||
bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
|
||||
OSResults *, tesseract::Tesseract *tess);
|
||||
|
||||
// Helper method to convert an orientation index to its value in degrees.
|
||||
// The value represents the amount of clockwise rotation in degrees that must be
|
||||
// applied for the text to be upright (readable).
|
||||
TESS_API int OrientationIdToValue(const int &id);
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCMAIN_OSDETECT_H_
|
|
@ -1,364 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: pageiterator.h
|
||||
// Description: Iterator for tesseract page structure that avoids using
|
||||
// tesseract internal data structures.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_PAGEITERATOR_H_
|
||||
|
||||
#include "export.h"
|
||||
#include "publictypes.h"
|
||||
|
||||
struct Pix;
|
||||
struct Pta;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
struct BlamerBundle;
|
||||
class C_BLOB_IT;
|
||||
class PAGE_RES;
|
||||
class PAGE_RES_IT;
|
||||
class WERD;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
/**
|
||||
* Class to iterate over tesseract page structure, providing access to all
|
||||
* levels of the page hierarchy, without including any tesseract headers or
|
||||
* having to handle any tesseract structures.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
* See tesseract/publictypes.h for the definition of PageIteratorLevel.
|
||||
* See also ResultIterator, derived from PageIterator, which adds in the
|
||||
* ability to access OCR output with text-specific methods.
|
||||
*/
|
||||
|
||||
class TESS_API PageIterator {
|
||||
public:
|
||||
/**
|
||||
* page_res and tesseract come directly from the BaseAPI.
|
||||
* The rectangle parameters are copied indirectly from the Thresholder,
|
||||
* via the BaseAPI. They represent the coordinates of some rectangle in an
|
||||
* original image (in top-left-origin coordinates) and therefore the top-left
|
||||
* needs to be added to any output boxes in order to specify coordinates
|
||||
* in the original image. See TessBaseAPI::SetRectangle.
|
||||
* The scale and scaled_yres are in case the Thresholder scaled the image
|
||||
* rectangle prior to thresholding. Any coordinates in tesseract's image
|
||||
* must be divided by scale before adding (rect_left, rect_top).
|
||||
* The scaled_yres indicates the effective resolution of the binary image
|
||||
* that tesseract has been given by the Thresholder.
|
||||
* After the constructor, Begin has already been called.
|
||||
*/
|
||||
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
|
||||
int scaled_yres, int rect_left, int rect_top, int rect_width,
|
||||
int rect_height);
|
||||
virtual ~PageIterator();
|
||||
|
||||
/**
|
||||
* Page/ResultIterators may be copied! This makes it possible to iterate over
|
||||
* all the objects at a lower level, while maintaining an iterator to
|
||||
* objects at a higher level. These constructors DO NOT CALL Begin, so
|
||||
* iterations will continue from the location of src.
|
||||
*/
|
||||
PageIterator(const PageIterator &src);
|
||||
const PageIterator &operator=(const PageIterator &src);
|
||||
|
||||
/** Are we positioned at the same location as other? */
|
||||
bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
|
||||
/**
|
||||
* Moves the iterator to point to the start of the page to begin an
|
||||
* iteration.
|
||||
*/
|
||||
virtual void Begin();
|
||||
|
||||
/**
|
||||
* Moves the iterator to the beginning of the paragraph.
|
||||
* This class implements this functionality by moving it to the zero indexed
|
||||
* blob of the first (leftmost) word on the first row of the paragraph.
|
||||
*/
|
||||
virtual void RestartParagraph();
|
||||
|
||||
/**
|
||||
* Return whether this iterator points anywhere in the first textline of a
|
||||
* paragraph.
|
||||
*/
|
||||
bool IsWithinFirstTextlineOfParagraph() const;
|
||||
|
||||
/**
|
||||
* Moves the iterator to the beginning of the text line.
|
||||
* This class implements this functionality by moving it to the zero indexed
|
||||
* blob of the first (leftmost) word of the row.
|
||||
*/
|
||||
virtual void RestartRow();
|
||||
|
||||
/**
|
||||
* Moves to the start of the next object at the given level in the
|
||||
* page hierarchy, and returns false if the end of the page was reached.
|
||||
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
|
||||
* PageIteratorLevel level values will visit each non-text block once.
|
||||
* Think of non text blocks as containing a single para, with a single line,
|
||||
* with a single imaginary word.
|
||||
* Calls to Next with different levels may be freely intermixed.
|
||||
* This function iterates words in right-to-left scripts correctly, if
|
||||
* the appropriate language has been loaded into Tesseract.
|
||||
*/
|
||||
virtual bool Next(PageIteratorLevel level);
|
||||
|
||||
/**
|
||||
* Returns true if the iterator is at the start of an object at the given
|
||||
* level.
|
||||
*
|
||||
* For instance, suppose an iterator it is pointed to the first symbol of the
|
||||
* first word of the third line of the second paragraph of the first block in
|
||||
* a page, then:
|
||||
* it.IsAtBeginningOf(RIL_BLOCK) = false
|
||||
* it.IsAtBeginningOf(RIL_PARA) = false
|
||||
* it.IsAtBeginningOf(RIL_TEXTLINE) = true
|
||||
* it.IsAtBeginningOf(RIL_WORD) = true
|
||||
* it.IsAtBeginningOf(RIL_SYMBOL) = true
|
||||
*/
|
||||
virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns whether the iterator is positioned at the last element in a
|
||||
* given level. (e.g. the last word in a line, the last line in a block)
|
||||
*
|
||||
* Here's some two-paragraph example
|
||||
* text. It starts off innocuously
|
||||
* enough but quickly turns bizarre.
|
||||
* The author inserts a cornucopia
|
||||
* of words to guard against confused
|
||||
* references.
|
||||
*
|
||||
* Now take an iterator it pointed to the start of "bizarre."
|
||||
* it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
|
||||
* it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
|
||||
* it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
|
||||
*/
|
||||
virtual bool IsAtFinalElement(PageIteratorLevel level,
|
||||
PageIteratorLevel element) const;
|
||||
|
||||
/**
|
||||
* Returns whether this iterator is positioned
|
||||
* before other: -1
|
||||
* equal to other: 0
|
||||
* after other: 1
|
||||
*/
|
||||
int Cmp(const PageIterator &other) const;
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
// Coordinate system:
|
||||
// Integer coordinates are at the cracks between the pixels.
|
||||
// The top-left corner of the top-left pixel in the image is at (0,0).
|
||||
// The bottom-right corner of the bottom-right pixel in the image is at
|
||||
// (width, height).
|
||||
// Every bounding box goes from the top-left of the top-left contained
|
||||
// pixel to the bottom-right of the bottom-right contained pixel, so
|
||||
// the bounding box of the single top-left pixel in the image is:
|
||||
// (0,0)->(1,1).
|
||||
// If an image rectangle has been set in the API, then returned coordinates
|
||||
// relate to the original (full) image, rather than the rectangle.
|
||||
|
||||
/**
|
||||
* Controls what to include in a bounding box. Bounding boxes of all levels
|
||||
* between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
|
||||
* Between layout analysis and recognition, it isn't known where all
|
||||
* diacritics belong, so this control is used to include or exclude some
|
||||
* diacritics that are above or below the main body of the word. In most cases
|
||||
* where the placement is obvious, and after recognition, it doesn't make as
|
||||
* much difference, as the diacritics will already be included in the word.
|
||||
*/
|
||||
void SetBoundingBoxComponents(bool include_upper_dots,
|
||||
bool include_lower_dots) {
|
||||
include_upper_dots_ = include_upper_dots;
|
||||
include_lower_dots_ = include_lower_dots;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the bounding rectangle of the current object at the given level.
|
||||
* See comment on coordinate system above.
|
||||
* Returns false if there is no such object at the current position.
|
||||
* The returned bounding box is guaranteed to match the size and position
|
||||
* of the image returned by GetBinaryImage, but may clip foreground pixels
|
||||
* from a grey image. The padding argument to GetImage can be used to expand
|
||||
* the image to include more foreground pixels. See GetImage below.
|
||||
*/
|
||||
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
|
||||
int *bottom) const;
|
||||
bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
|
||||
int *right, int *bottom) const;
|
||||
/**
|
||||
* Returns the bounding rectangle of the object in a coordinate system of the
|
||||
* working image rectangle having its origin at (rect_left_, rect_top_) with
|
||||
* respect to the original image and is scaled by a factor scale_.
|
||||
*/
|
||||
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
|
||||
int *right, int *bottom) const;
|
||||
|
||||
/** Returns whether there is no object of a given level. */
|
||||
bool Empty(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns the type of the current block.
|
||||
* See tesseract/publictypes.h for PolyBlockType.
|
||||
*/
|
||||
PolyBlockType BlockType() const;
|
||||
|
||||
/**
|
||||
* Returns the polygon outline of the current block. The returned Pta must
|
||||
* be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
|
||||
* of the polygon, and the last edge is the line segment between the last
|
||||
* point and the first point. nullptr will be returned if the iterator is
|
||||
* at the end of the document or layout analysis was not used.
|
||||
*/
|
||||
Pta *BlockPolygon() const;
|
||||
|
||||
/**
|
||||
* Returns a binary image of the current object at the given level.
|
||||
* The position and size match the return from BoundingBoxInternal, and so
|
||||
* this could be upscaled with respect to the original input image.
|
||||
* Use pixDestroy to delete the image after use.
|
||||
*/
|
||||
Pix *GetBinaryImage(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns an image of the current object at the given level in greyscale
|
||||
* if available in the input. To guarantee a binary image use BinaryImage.
|
||||
* NOTE that in order to give the best possible image, the bounds are
|
||||
* expanded slightly over the binary connected component, by the supplied
|
||||
* padding, so the top-left position of the returned image is returned
|
||||
* in (left,top). These will most likely not match the coordinates
|
||||
* returned by BoundingBox.
|
||||
* If you do not supply an original image, you will get a binary one.
|
||||
* Use pixDestroy to delete the image after use.
|
||||
*/
|
||||
Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
|
||||
int *left, int *top) const;
|
||||
|
||||
/**
|
||||
* Returns the baseline of the current object at the given level.
|
||||
* The baseline is the line that passes through (x1, y1) and (x2, y2).
|
||||
* WARNING: with vertical text, baselines may be vertical!
|
||||
* Returns false if there is no baseline at the current position.
|
||||
*/
|
||||
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
|
||||
int *y2) const;
|
||||
|
||||
// Returns the attributes of the current row.
|
||||
void RowAttributes(float *row_height, float *descenders,
|
||||
float *ascenders) const;
|
||||
|
||||
/**
|
||||
* Returns orientation for the block the iterator points to.
|
||||
* orientation, writing_direction, textline_order: see publictypes.h
|
||||
* deskew_angle: after rotating the block so the text orientation is
|
||||
* upright, how many radians does one have to rotate the
|
||||
* block anti-clockwise for it to be level?
|
||||
* -Pi/4 <= deskew_angle <= Pi/4
|
||||
*/
|
||||
void Orientation(tesseract::Orientation *orientation,
|
||||
tesseract::WritingDirection *writing_direction,
|
||||
tesseract::TextlineOrder *textline_order,
|
||||
float *deskew_angle) const;
|
||||
|
||||
/**
|
||||
* Returns information about the current paragraph, if available.
|
||||
*
|
||||
* justification -
|
||||
* LEFT if ragged right, or fully justified and script is left-to-right.
|
||||
* RIGHT if ragged left, or fully justified and script is right-to-left.
|
||||
* unknown if it looks like source code or we have very few lines.
|
||||
* is_list_item -
|
||||
* true if we believe this is a member of an ordered or unordered list.
|
||||
* is_crown -
|
||||
* true if the first line of the paragraph is aligned with the other
|
||||
* lines of the paragraph even though subsequent paragraphs have first
|
||||
* line indents. This typically indicates that this is the continuation
|
||||
* of a previous paragraph or that it is the very first paragraph in
|
||||
* the chapter.
|
||||
* first_line_indent -
|
||||
* For LEFT aligned paragraphs, the first text line of paragraphs of
|
||||
* this kind are indented this many pixels from the left edge of the
|
||||
* rest of the paragraph.
|
||||
* for RIGHT aligned paragraphs, the first text line of paragraphs of
|
||||
* this kind are indented this many pixels from the right edge of the
|
||||
* rest of the paragraph.
|
||||
* NOTE 1: This value may be negative.
|
||||
* NOTE 2: if *is_crown == true, the first line of this paragraph is
|
||||
* actually flush, and first_line_indent is set to the "common"
|
||||
* first_line_indent for subsequent paragraphs in this block
|
||||
* of text.
|
||||
*/
|
||||
void ParagraphInfo(tesseract::ParagraphJustification *justification,
|
||||
bool *is_list_item, bool *is_crown,
|
||||
int *first_line_indent) const;
|
||||
|
||||
// If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
|
||||
// of the current word to the given pointer (takes ownership of the pointer)
|
||||
// and returns true.
|
||||
// Can only be used when iterating on the word level.
|
||||
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Sets up the internal data for iterating the blobs of a new word, then
|
||||
* moves the iterator to the given offset.
|
||||
*/
|
||||
void BeginWord(int offset);
|
||||
|
||||
/** Pointer to the page_res owned by the API. */
|
||||
PAGE_RES *page_res_;
|
||||
/** Pointer to the Tesseract object owned by the API. */
|
||||
Tesseract *tesseract_;
|
||||
/**
|
||||
* The iterator to the page_res_. Owned by this ResultIterator.
|
||||
* A pointer just to avoid dragging in Tesseract includes.
|
||||
*/
|
||||
PAGE_RES_IT *it_;
|
||||
/**
|
||||
* The current input WERD being iterated. If there is an output from OCR,
|
||||
* then word_ is nullptr. Owned by the API
|
||||
*/
|
||||
WERD *word_;
|
||||
/** The length of the current word_. */
|
||||
int word_length_;
|
||||
/** The current blob index within the word. */
|
||||
int blob_index_;
|
||||
/**
|
||||
* Iterator to the blobs within the word. If nullptr, then we are iterating
|
||||
* OCR results in the box_word.
|
||||
* Owned by this ResultIterator.
|
||||
*/
|
||||
C_BLOB_IT *cblob_it_;
|
||||
/** Control over what to include in bounding boxes. */
|
||||
bool include_upper_dots_;
|
||||
bool include_lower_dots_;
|
||||
/** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
|
||||
int scale_;
|
||||
int scaled_yres_;
|
||||
int rect_left_;
|
||||
int rect_top_;
|
||||
int rect_width_;
|
||||
int rect_height_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_
|
|
@ -1,281 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: publictypes.h
|
||||
// Description: Types used in both the API and internally
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// This file contains types that are used both by the API and internally
|
||||
// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
|
||||
// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
|
||||
// Restated: It is OK for low-level Tesseract files to include publictypes.h,
|
||||
// but not for the low-level tesseract code to include top-level API code.
|
||||
// This file should not use other Tesseract types, as that would drag
|
||||
// their includes into the API-level.
|
||||
|
||||
/** Number of printers' points in an inch. The unit of the pointsize return. */
|
||||
constexpr int kPointsPerInch = 72;
|
||||
/**
|
||||
* Minimum believable resolution. Used as a default if there is no other
|
||||
* information, as it is safer to under-estimate than over-estimate.
|
||||
*/
|
||||
constexpr int kMinCredibleResolution = 70;
|
||||
/** Maximum believable resolution. */
|
||||
constexpr int kMaxCredibleResolution = 2400;
|
||||
/**
|
||||
* Ratio between median blob size and likely resolution. Used to estimate
|
||||
* resolution when none is provided. This is basically 1/usual text size in
|
||||
* inches. */
|
||||
constexpr int kResolutionEstimationFactor = 10;
|
||||
|
||||
/**
|
||||
* Possible types for a POLY_BLOCK or ColPartition.
|
||||
* Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
|
||||
* below, as well as kPolyBlockNames in layout_test.cc.
|
||||
* Used extensively by ColPartition, and POLY_BLOCK.
|
||||
*/
|
||||
enum PolyBlockType {
|
||||
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
|
||||
PT_FLOWING_TEXT, // Text that lives inside a column.
|
||||
PT_HEADING_TEXT, // Text that spans more than one column.
|
||||
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
|
||||
PT_EQUATION, // Partition belonging to an equation region.
|
||||
PT_INLINE_EQUATION, // Partition has inline equation.
|
||||
PT_TABLE, // Partition belonging to a table region.
|
||||
PT_VERTICAL_TEXT, // Text-line runs vertically.
|
||||
PT_CAPTION_TEXT, // Text that belongs to an image.
|
||||
PT_FLOWING_IMAGE, // Image that lives inside a column.
|
||||
PT_HEADING_IMAGE, // Image that spans more than one column.
|
||||
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
|
||||
PT_HORZ_LINE, // Horizontal Line.
|
||||
PT_VERT_LINE, // Vertical Line.
|
||||
PT_NOISE, // Lies outside of any column.
|
||||
PT_COUNT
|
||||
};
|
||||
|
||||
/** Returns true if PolyBlockType is of horizontal line type */
|
||||
inline bool PTIsLineType(PolyBlockType type) {
|
||||
return type == PT_HORZ_LINE || type == PT_VERT_LINE;
|
||||
}
|
||||
/** Returns true if PolyBlockType is of image type */
|
||||
inline bool PTIsImageType(PolyBlockType type) {
|
||||
return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
|
||||
type == PT_PULLOUT_IMAGE;
|
||||
}
|
||||
/** Returns true if PolyBlockType is of text type */
|
||||
inline bool PTIsTextType(PolyBlockType type) {
|
||||
return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
|
||||
type == PT_PULLOUT_TEXT || type == PT_TABLE ||
|
||||
type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
|
||||
type == PT_INLINE_EQUATION;
|
||||
}
|
||||
// Returns true if PolyBlockType is of pullout(inter-column) type
|
||||
inline bool PTIsPulloutType(PolyBlockType type) {
|
||||
return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
|
||||
}
|
||||
|
||||
/**
|
||||
* +------------------+ Orientation Example:
|
||||
* | 1 Aaaa Aaaa Aaaa | ====================
|
||||
* | Aaa aa aaa aa | To left is a diagram of some (1) English and
|
||||
* | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit.
|
||||
* | 2 |
|
||||
* | ####### c c C | Upright Latin characters are represented as A and a.
|
||||
* | ####### c c c | '<' represents a latin character rotated
|
||||
* | < ####### c c c | anti-clockwise 90 degrees.
|
||||
* | < ####### c c |
|
||||
* | < ####### . c | Upright Chinese characters are represented C and c.
|
||||
* | 3 ####### c |
|
||||
* +------------------+ NOTA BENE: enum values here should match goodoc.proto
|
||||
|
||||
* If you orient your head so that "up" aligns with Orientation,
|
||||
* then the characters will appear "right side up" and readable.
|
||||
*
|
||||
* In the example above, both the English and Chinese paragraphs are oriented
|
||||
* so their "up" is the top of the page (page up). The photo credit is read
|
||||
* with one's head turned leftward ("up" is to page left).
|
||||
*
|
||||
* The values of this enum match the convention of Tesseract's osdetect.h
|
||||
*/
|
||||
enum Orientation {
|
||||
ORIENTATION_PAGE_UP = 0,
|
||||
ORIENTATION_PAGE_RIGHT = 1,
|
||||
ORIENTATION_PAGE_DOWN = 2,
|
||||
ORIENTATION_PAGE_LEFT = 3,
|
||||
};
|
||||
|
||||
/**
|
||||
* The grapheme clusters within a line of text are laid out logically
|
||||
* in this direction, judged when looking at the text line rotated so that
|
||||
* its Orientation is "page up".
|
||||
*
|
||||
* For English text, the writing direction is left-to-right. For the
|
||||
* Chinese text in the above example, the writing direction is top-to-bottom.
|
||||
*/
|
||||
enum WritingDirection {
|
||||
WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
|
||||
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
|
||||
WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
|
||||
};
|
||||
|
||||
/**
|
||||
* The text lines are read in the given sequence.
|
||||
*
|
||||
* In English, the order is top-to-bottom.
|
||||
* In Chinese, vertical text lines are read right-to-left. Mongolian is
|
||||
* written in vertical columns top to bottom like Chinese, but the lines
|
||||
* order left-to right.
|
||||
*
|
||||
* Note that only some combinations make sense. For example,
|
||||
* WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
|
||||
*/
|
||||
enum TextlineOrder {
|
||||
TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
|
||||
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
|
||||
TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
|
||||
};
|
||||
|
||||
/**
|
||||
* Possible modes for page layout analysis. These *must* be kept in order
|
||||
* of decreasing amount of layout analysis to be done, except for OSD_ONLY,
|
||||
* so that the inequality test macros below work.
|
||||
*/
|
||||
enum PageSegMode {
|
||||
PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
|
||||
PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
|
||||
///< script detection. (OSD)
|
||||
PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
|
||||
PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
|
||||
PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
|
||||
PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
|
||||
///< vertically aligned text.
|
||||
PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
|
||||
PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
|
||||
PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
|
||||
PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
|
||||
PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
|
||||
PSM_SPARSE_TEXT =
|
||||
11, ///< Find as much text as possible in no particular order.
|
||||
PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
|
||||
PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
|
||||
///< hacks that are Tesseract-specific.
|
||||
|
||||
PSM_COUNT ///< Number of enum entries.
|
||||
};
|
||||
|
||||
/**
|
||||
* Inline functions that act on a PageSegMode to determine whether components of
|
||||
* layout analysis are enabled.
|
||||
* *Depend critically on the order of elements of PageSegMode.*
|
||||
* NOTE that arg is an int for compatibility with INT_PARAM.
|
||||
*/
|
||||
inline bool PSM_OSD_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
|
||||
}
|
||||
inline bool PSM_SPARSE(int pageseg_mode) {
|
||||
return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
|
||||
}
|
||||
inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
|
||||
}
|
||||
inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
|
||||
return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
|
||||
pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
|
||||
/**
|
||||
* enum of the elements of the page hierarchy, used in ResultIterator
|
||||
* to provide functions that operate on each level without having to
|
||||
* have 5x as many functions.
|
||||
*/
|
||||
enum PageIteratorLevel {
|
||||
RIL_BLOCK, // Block of text/image/separator line.
|
||||
RIL_PARA, // Paragraph within a block.
|
||||
RIL_TEXTLINE, // Line within a paragraph.
|
||||
RIL_WORD, // Word within a textline.
|
||||
RIL_SYMBOL // Symbol/character within a word.
|
||||
};
|
||||
|
||||
/**
|
||||
* JUSTIFICATION_UNKNOWN
|
||||
* The alignment is not clearly one of the other options. This could happen
|
||||
* for example if there are only one or two lines of text or the text looks
|
||||
* like source code or poetry.
|
||||
*
|
||||
* NOTA BENE: Fully justified paragraphs (text aligned to both left and right
|
||||
* margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
|
||||
* is written with a left-to-right script and with JUSTIFICATION_RIGHT if
|
||||
* their text is written in a right-to-left script.
|
||||
*
|
||||
* Interpretation for text read in vertical lines:
|
||||
* "Left" is wherever the starting reading position is.
|
||||
*
|
||||
* JUSTIFICATION_LEFT
|
||||
* Each line, except possibly the first, is flush to the same left tab stop.
|
||||
*
|
||||
* JUSTIFICATION_CENTER
|
||||
* The text lines of the paragraph are centered about a line going
|
||||
* down through their middle of the text lines.
|
||||
*
|
||||
* JUSTIFICATION_RIGHT
|
||||
* Each line, except possibly the first, is flush to the same right tab stop.
|
||||
*/
|
||||
enum ParagraphJustification {
|
||||
JUSTIFICATION_UNKNOWN,
|
||||
JUSTIFICATION_LEFT,
|
||||
JUSTIFICATION_CENTER,
|
||||
JUSTIFICATION_RIGHT,
|
||||
};
|
||||
|
||||
/**
|
||||
* When Tesseract/Cube is initialized we can choose to instantiate/load/run
|
||||
* only the Tesseract part, only the Cube part or both along with the combiner.
|
||||
* The preference of which engine to use is stored in tessedit_ocr_engine_mode.
|
||||
*
|
||||
* ATTENTION: When modifying this enum, please make sure to make the
|
||||
* appropriate changes to all the enums mirroring it (e.g. OCREngine in
|
||||
* cityblock/workflow/detection/detection_storage.proto). Such enums will
|
||||
* mention the connection to OcrEngineMode in the comments.
|
||||
*/
|
||||
enum OcrEngineMode {
|
||||
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
|
||||
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
|
||||
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
|
||||
// to Tesseract when things get difficult.
|
||||
// deprecated
|
||||
OEM_DEFAULT, // Specify this mode when calling init_*(),
|
||||
// to indicate that any of the above modes
|
||||
// should be automatically inferred from the
|
||||
// variables in the language-specific config,
|
||||
// command-line configs, or if not specified
|
||||
// in any of the above should be set to the
|
||||
// default OEM_TESSERACT_ONLY.
|
||||
OEM_COUNT // Number of OEMs
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
|
@ -1,311 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: renderer.h
|
||||
// Description: Rendering interface to inject into TessBaseAPI
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_RENDERER_H_
|
||||
#define TESSERACT_API_RENDERER_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
|
||||
// complexity of includes here. Use forward declarations wherever possible
|
||||
// and hide includes of complex types in baseapi.cpp.
|
||||
#include <cstdint>
|
||||
#include <string> // for std::string
|
||||
#include <vector> // for std::vector
|
||||
|
||||
struct Pix;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class TessBaseAPI;
|
||||
|
||||
/**
|
||||
* Interface for rendering tesseract results into a document, such as text,
|
||||
* HOCR or pdf. This class is abstract. Specific classes handle individual
|
||||
* formats. This interface is then used to inject the renderer class into
|
||||
* tesseract when processing images.
|
||||
*
|
||||
* For simplicity implementing this with tesseract version 3.01,
|
||||
* the renderer contains document state that is cleared from document
|
||||
* to document just as the TessBaseAPI is. This way the base API can just
|
||||
* delegate its rendering functionality to injected renderers, and the
|
||||
* renderers can manage the associated state needed for the specific formats
|
||||
* in addition to the heuristics for producing it.
|
||||
*/
|
||||
class TESS_API TessResultRenderer {
|
||||
public:
|
||||
virtual ~TessResultRenderer();
|
||||
|
||||
// Takes ownership of pointer so must be new'd instance.
|
||||
// Renderers aren't ordered, but appends the sequences of next parameter
|
||||
// and existing next(). The renderers should be unique across both lists.
|
||||
void insert(TessResultRenderer *next);
|
||||
|
||||
// Returns the next renderer or nullptr.
|
||||
TessResultRenderer *next() {
|
||||
return next_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts a new document with the given title.
|
||||
* This clears the contents of the output data.
|
||||
* Title should use UTF-8 encoding.
|
||||
*/
|
||||
bool BeginDocument(const char *title);
|
||||
|
||||
/**
|
||||
* Adds the recognized text from the source image to the current document.
|
||||
* Invalid if BeginDocument not yet called.
|
||||
*
|
||||
* Note that this API is a bit weird but is designed to fit into the
|
||||
* current TessBaseAPI implementation where the api has lots of state
|
||||
* information that we might want to add in.
|
||||
*/
|
||||
bool AddImage(TessBaseAPI *api);
|
||||
|
||||
/**
|
||||
* Finishes the document and finalizes the output data
|
||||
* Invalid if BeginDocument not yet called.
|
||||
*/
|
||||
bool EndDocument();
|
||||
|
||||
const char *file_extension() const {
|
||||
return file_extension_;
|
||||
}
|
||||
const char *title() const {
|
||||
return title_.c_str();
|
||||
}
|
||||
|
||||
// Is everything fine? Otherwise something went wrong.
|
||||
bool happy() const {
|
||||
return happy_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the last image given to AddImage
|
||||
* (i.e. images are incremented whether the image succeeded or not)
|
||||
*
|
||||
* This is always defined. It means either the number of the
|
||||
* current image, the last image ended, or in the completed document
|
||||
* depending on when in the document lifecycle you are looking at it.
|
||||
* Will return -1 if a document was never started.
|
||||
*/
|
||||
int imagenum() const {
|
||||
return imagenum_;
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Called by concrete classes.
|
||||
*
|
||||
* outputbase is the name of the output file excluding
|
||||
* extension. For example, "/path/to/chocolate-chip-cookie-recipe"
|
||||
*
|
||||
* extension indicates the file extension to be used for output
|
||||
* files. For example "pdf" will produce a .pdf file, and "hocr"
|
||||
* will produce .hocr files.
|
||||
*/
|
||||
TessResultRenderer(const char *outputbase, const char *extension);
|
||||
|
||||
// Hook for specialized handling in BeginDocument()
|
||||
virtual bool BeginDocumentHandler();
|
||||
|
||||
// This must be overridden to render the OCR'd results
|
||||
virtual bool AddImageHandler(TessBaseAPI *api) = 0;
|
||||
|
||||
// Hook for specialized handling in EndDocument()
|
||||
virtual bool EndDocumentHandler();
|
||||
|
||||
// Renderers can call this to append '\0' terminated strings into
|
||||
// the output string returned by GetOutput.
|
||||
// This method will grow the output buffer if needed.
|
||||
void AppendString(const char *s);
|
||||
|
||||
// Renderers can call this to append binary byte sequences into
|
||||
// the output string returned by GetOutput. Note that s is not necessarily
|
||||
// '\0' terminated (and can contain '\0' within it).
|
||||
// This method will grow the output buffer if needed.
|
||||
void AppendData(const char *s, int len);
|
||||
|
||||
private:
|
||||
TessResultRenderer *next_; // Can link multiple renderers together
|
||||
FILE *fout_; // output file pointer
|
||||
const char *file_extension_; // standard extension for generated output
|
||||
std::string title_; // title of document being rendered
|
||||
int imagenum_; // index of last image added
|
||||
bool happy_; // I get grumpy when the disk fills up, etc.
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessTextRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessTextRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an hocr text string
|
||||
*/
|
||||
class TESS_API TessHOcrRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessHOcrRenderer(const char *outputbase, bool font_info);
|
||||
explicit TessHOcrRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool font_info_; // whether to print font information
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an alto text string
|
||||
*/
|
||||
class TESS_API TessAltoRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessAltoRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool begin_document;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders Tesseract output into a TSV string
|
||||
*/
|
||||
class TESS_API TessTsvRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessTsvRenderer(const char *outputbase, bool font_info);
|
||||
explicit TessTsvRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool font_info_; // whether to print font information
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into searchable PDF
|
||||
*/
|
||||
class TESS_API TessPDFRenderer : public TessResultRenderer {
|
||||
public:
|
||||
// datadir is the location of the TESSDATA. We need it because
|
||||
// we load a custom PDF font from this location.
|
||||
TessPDFRenderer(const char *outputbase, const char *datadir,
|
||||
bool textonly = false);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
// We don't want to have every image in memory at once,
|
||||
// so we store some metadata as we go along producing
|
||||
// PDFs one page at a time. At the end, that metadata is
|
||||
// used to make everything that isn't easily handled in a
|
||||
// streaming fashion.
|
||||
long int obj_; // counter for PDF objects
|
||||
std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
|
||||
std::vector<long int> pages_; // object number for every /Page object
|
||||
std::string datadir_; // where to find the custom font
|
||||
bool textonly_; // skip images if set
|
||||
// Bookkeeping only. DIY = Do It Yourself.
|
||||
void AppendPDFObjectDIY(size_t objectsize);
|
||||
// Bookkeeping + emit data.
|
||||
void AppendPDFObject(const char *data);
|
||||
// Create the /Contents object for an entire page.
|
||||
char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
|
||||
// Turn an image into a PDF object. Only transcode if we have to.
|
||||
static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
|
||||
char **pdf_object, long int *pdf_object_size,
|
||||
int jpg_quality);
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessUnlvRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessUnlvRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string for LSTMBox
|
||||
*/
|
||||
class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessLSTMBoxRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessBoxTextRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string in WordStr format
|
||||
*/
|
||||
class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessWordStrBoxRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an osd text string
|
||||
*/
|
||||
class TESS_API TessOsdRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessOsdRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_API_RENDERER_H_
|
|
@ -1,250 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: resultiterator.h
|
||||
// Description: Iterator for tesseract results that is capable of
|
||||
// iterating in proper reading order over Bi Directional
|
||||
// (e.g. mixed Hebrew and English) text.
|
||||
// Author: David Eger
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
||||
|
||||
#include "export.h" // for TESS_API, TESS_LOCAL
|
||||
#include "ltrresultiterator.h" // for LTRResultIterator
|
||||
#include "publictypes.h" // for PageIteratorLevel
|
||||
#include "unichar.h" // for StrongScriptDirection
|
||||
|
||||
#include <set> // for std::pair
|
||||
#include <vector> // for std::vector
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class TESS_API ResultIterator : public LTRResultIterator {
|
||||
public:
|
||||
static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
|
||||
|
||||
/**
|
||||
* ResultIterator is copy constructible!
|
||||
* The default copy constructor works just fine for us.
|
||||
*/
|
||||
~ResultIterator() override = default;
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
/**
|
||||
* Moves the iterator to point to the start of the page to begin
|
||||
* an iteration.
|
||||
*/
|
||||
void Begin() override;
|
||||
|
||||
/**
|
||||
* Moves to the start of the next object at the given level in the
|
||||
* page hierarchy in the appropriate reading order and returns false if
|
||||
* the end of the page was reached.
|
||||
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
|
||||
* PageIteratorLevel level values will visit each non-text block once.
|
||||
* Think of non text blocks as containing a single para, with a single line,
|
||||
* with a single imaginary word.
|
||||
* Calls to Next with different levels may be freely intermixed.
|
||||
* This function iterates words in right-to-left scripts correctly, if
|
||||
* the appropriate language has been loaded into Tesseract.
|
||||
*/
|
||||
bool Next(PageIteratorLevel level) override;
|
||||
|
||||
/**
|
||||
* IsAtBeginningOf() returns whether we're at the logical beginning of the
|
||||
* given level. (as opposed to ResultIterator's left-to-right top-to-bottom
|
||||
* order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
|
||||
* For a full description, see pageiterator.h
|
||||
*/
|
||||
bool IsAtBeginningOf(PageIteratorLevel level) const override;
|
||||
|
||||
/**
|
||||
* Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
|
||||
* For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
|
||||
* point at the last word in a paragraph. See PageIterator for full comment.
|
||||
*/
|
||||
bool IsAtFinalElement(PageIteratorLevel level,
|
||||
PageIteratorLevel element) const override;
|
||||
|
||||
// ============= Functions that refer to words only ============.
|
||||
// Returns the number of blanks before the current word.
|
||||
int BlanksBeforeWord() const;
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
/**
|
||||
* Returns the null terminated UTF-8 encoded text string for the current
|
||||
* object at the given level. Use delete [] to free after use.
|
||||
*/
|
||||
virtual char *GetUTF8Text(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns the LSTM choices for every LSTM timestep for the current word.
|
||||
*/
|
||||
virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
|
||||
*GetRawLSTMTimesteps() const;
|
||||
virtual std::vector<std::vector<std::pair<const char *, float>>>
|
||||
*GetBestLSTMSymbolChoices() const;
|
||||
|
||||
/**
|
||||
* Return whether the current paragraph's dominant reading direction
|
||||
* is left-to-right (as opposed to right-to-left).
|
||||
*/
|
||||
bool ParagraphIsLtr() const;
|
||||
|
||||
// ============= Exposed only for testing =============.
|
||||
|
||||
/**
|
||||
* Yields the reading order as a sequence of indices and (optional)
|
||||
* meta-marks for a set of words (given left-to-right).
|
||||
* The meta marks are passed as negative values:
|
||||
* kMinorRunStart Start of minor direction text.
|
||||
* kMinorRunEnd End of minor direction text.
|
||||
* kComplexWord The next indexed word contains both left-to-right and
|
||||
* right-to-left characters and was treated as neutral.
|
||||
*
|
||||
* For example, suppose we have five words in a text line,
|
||||
* indexed [0,1,2,3,4] from the leftmost side of the text line.
|
||||
* The following are all believable reading_orders:
|
||||
*
|
||||
* Left-to-Right (in ltr paragraph):
|
||||
* { 0, 1, 2, 3, 4 }
|
||||
* Left-to-Right (in rtl paragraph):
|
||||
* { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
|
||||
* Right-to-Left (in rtl paragraph):
|
||||
* { 4, 3, 2, 1, 0 }
|
||||
* Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
|
||||
* { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
|
||||
*/
|
||||
static void CalculateTextlineOrder(
|
||||
bool paragraph_is_ltr,
|
||||
const std::vector<StrongScriptDirection> &word_dirs,
|
||||
std::vector<int> *reading_order);
|
||||
|
||||
static const int kMinorRunStart;
|
||||
static const int kMinorRunEnd;
|
||||
static const int kComplexWord;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* We presume the data associated with the given iterator will outlive us.
|
||||
* NB: This is private because it does something that is non-obvious:
|
||||
* it resets to the beginning of the paragraph instead of staying wherever
|
||||
* resit might have pointed.
|
||||
*/
|
||||
explicit ResultIterator(const LTRResultIterator &resit);
|
||||
|
||||
private:
|
||||
/**
|
||||
* Calculates the current paragraph's dominant writing direction.
|
||||
* Typically, members should use current_paragraph_ltr_ instead.
|
||||
*/
|
||||
bool CurrentParagraphIsLtr() const;
|
||||
|
||||
/**
|
||||
* Returns word indices as measured from resit->RestartRow() = index 0
|
||||
* for the reading order of words within a textline given an iterator
|
||||
* into the middle of the text line.
|
||||
* In addition to non-negative word indices, the following negative values
|
||||
* may be inserted:
|
||||
* kMinorRunStart Start of minor direction text.
|
||||
* kMinorRunEnd End of minor direction text.
|
||||
* kComplexWord The previous word contains both left-to-right and
|
||||
* right-to-left characters and was treated as neutral.
|
||||
*/
|
||||
void CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
const LTRResultIterator &resit,
|
||||
std::vector<int> *indices) const;
|
||||
/** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
|
||||
void CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
const LTRResultIterator &resit,
|
||||
std::vector<StrongScriptDirection> *ssd,
|
||||
std::vector<int> *indices) const;
|
||||
|
||||
/**
|
||||
* What is the index of the current word in a strict left-to-right reading
|
||||
* of the row?
|
||||
*/
|
||||
int LTRWordIndex() const;
|
||||
|
||||
/**
|
||||
* Given an iterator pointing at a word, returns the logical reading order
|
||||
* of blob indices for the word.
|
||||
*/
|
||||
void CalculateBlobOrder(std::vector<int> *blob_indices) const;
|
||||
|
||||
/** Precondition: current_paragraph_is_ltr_ is set. */
|
||||
void MoveToLogicalStartOfTextline();
|
||||
|
||||
/**
|
||||
* Precondition: current_paragraph_is_ltr_ and in_minor_direction_
|
||||
* are set.
|
||||
*/
|
||||
void MoveToLogicalStartOfWord();
|
||||
|
||||
/** Are we pointing at the final (reading order) symbol of the word? */
|
||||
bool IsAtFinalSymbolOfWord() const;
|
||||
|
||||
/** Are we pointing at the first (reading order) symbol of the word? */
|
||||
bool IsAtFirstSymbolOfWord() const;
|
||||
|
||||
/**
|
||||
* Append any extra marks that should be appended to this word when printed.
|
||||
* Mostly, these are Unicode BiDi control characters.
|
||||
*/
|
||||
void AppendSuffixMarks(std::string *text) const;
|
||||
|
||||
/** Appends the current word in reading order to the given buffer.*/
|
||||
void AppendUTF8WordText(std::string *text) const;
|
||||
|
||||
/**
|
||||
* Appends the text of the current text line, *assuming this iterator is
|
||||
* positioned at the beginning of the text line* This function
|
||||
* updates the iterator to point to the first position past the text line.
|
||||
* Each textline is terminated in a single newline character.
|
||||
* If the textline ends a paragraph, it gets a second terminal newline.
|
||||
*/
|
||||
void IterateAndAppendUTF8TextlineText(std::string *text);
|
||||
|
||||
/**
|
||||
* Appends the text of the current paragraph in reading order
|
||||
* to the given buffer.
|
||||
* Each textline is terminated in a single newline character, and the
|
||||
* paragraph gets an extra newline at the end.
|
||||
*/
|
||||
void AppendUTF8ParagraphText(std::string *text) const;
|
||||
|
||||
/** Returns whether the bidi_debug flag is set to at least min_level. */
|
||||
bool BidiDebug(int min_level) const;
|
||||
|
||||
bool current_paragraph_is_ltr_;
|
||||
|
||||
/**
|
||||
* Is the currently pointed-at character at the beginning of
|
||||
* a minor-direction run?
|
||||
*/
|
||||
bool at_beginning_of_minor_run_;
|
||||
|
||||
/** Is the currently pointed-at character in a minor-direction sequence? */
|
||||
bool in_minor_direction_;
|
||||
|
||||
/**
|
||||
* Should detected inter-word spaces be preserved, or "compressed" to a single
|
||||
* space character (default behavior).
|
||||
*/
|
||||
bool preserve_interword_spaces_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
|
@ -1,174 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: unichar.h
|
||||
// Description: Unicode character/ligature class.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCUTIL_UNICHAR_H_
|
||||
#define TESSERACT_CCUTIL_UNICHAR_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
#include <memory.h>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Maximum number of characters that can be stored in a UNICHAR. Must be
|
||||
// at least 4. Must not exceed 31 without changing the coding of length.
|
||||
#define UNICHAR_LEN 30
|
||||
|
||||
// A UNICHAR_ID is the unique id of a unichar.
|
||||
using UNICHAR_ID = int;
|
||||
|
||||
// A variable to indicate an invalid or uninitialized unichar id.
|
||||
static const int INVALID_UNICHAR_ID = -1;
|
||||
// A special unichar that corresponds to INVALID_UNICHAR_ID.
|
||||
static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
|
||||
|
||||
enum StrongScriptDirection {
|
||||
DIR_NEUTRAL = 0, // Text contains only neutral characters.
|
||||
DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
|
||||
DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
|
||||
DIR_MIX = 3, // Text contains a mixture of left-to-right
|
||||
// and right-to-left characters.
|
||||
};
|
||||
|
||||
using char32 = signed int;
|
||||
|
||||
// The UNICHAR class holds a single classification result. This may be
|
||||
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
|
||||
// multiple Unicode characters representing the NFKC expansion of a ligature
|
||||
// such as fi, ffl etc. These are also stored as utf8.
|
||||
class TESS_API UNICHAR {
|
||||
public:
|
||||
UNICHAR() {
|
||||
memset(chars, 0, UNICHAR_LEN);
|
||||
}
|
||||
|
||||
// Construct from a utf8 string. If len<0 then the string is null terminated.
|
||||
// If the string is too long to fit in the UNICHAR then it takes only what
|
||||
// will fit.
|
||||
UNICHAR(const char *utf8_str, int len);
|
||||
|
||||
// Construct from a single UCS4 character.
|
||||
explicit UNICHAR(int unicode);
|
||||
|
||||
// Default copy constructor and operator= are OK.
|
||||
|
||||
// Get the first character as UCS-4.
|
||||
int first_uni() const;
|
||||
|
||||
// Get the length of the UTF8 string.
|
||||
int utf8_len() const {
|
||||
int len = chars[UNICHAR_LEN - 1];
|
||||
return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
|
||||
}
|
||||
|
||||
// Get a UTF8 string, but NOT nullptr terminated.
|
||||
const char *utf8() const {
|
||||
return chars;
|
||||
}
|
||||
|
||||
// Get a terminated UTF8 string: Must delete[] it after use.
|
||||
char *utf8_str() const;
|
||||
|
||||
// Get the number of bytes in the first character of the given utf8 string.
|
||||
static int utf8_step(const char *utf8_str);
|
||||
|
||||
// A class to simplify iterating over and accessing elements of a UTF8
|
||||
// string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
|
||||
// take ownership of the underlying byte array. It also does not permit
|
||||
// modification of the array (as the name suggests).
|
||||
//
|
||||
// Example:
|
||||
// for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
|
||||
// it != UNICHAR::end(str, len);
|
||||
// ++it) {
|
||||
// printf("UCS-4 symbol code = %d\n", *it);
|
||||
// char buf[5];
|
||||
// int char_len = it.get_utf8(buf); buf[char_len] = '\0';
|
||||
// printf("Char = %s\n", buf);
|
||||
// }
|
||||
class TESS_API const_iterator {
|
||||
using CI = const_iterator;
|
||||
|
||||
public:
|
||||
// Step to the next UTF8 character.
|
||||
// If the current position is at an illegal UTF8 character, then print an
|
||||
// error message and step by one byte. If the current position is at a
|
||||
// nullptr value, don't step past it.
|
||||
const_iterator &operator++();
|
||||
|
||||
// Return the UCS-4 value at the current position.
|
||||
// If the current position is at an illegal UTF8 value, return a single
|
||||
// space character.
|
||||
int operator*() const;
|
||||
|
||||
// Store the UTF-8 encoding of the current codepoint into buf, which must be
|
||||
// at least 4 bytes long. Return the number of bytes written.
|
||||
// If the current position is at an illegal UTF8 value, writes a single
|
||||
// space character and returns 1.
|
||||
// Note that this method does not null-terminate the buffer.
|
||||
int get_utf8(char *buf) const;
|
||||
// Returns the number of bytes of the current codepoint. Returns 1 if the
|
||||
// current position is at an illegal UTF8 value.
|
||||
int utf8_len() const;
|
||||
// Returns true if the UTF-8 encoding at the current position is legal.
|
||||
bool is_legal() const;
|
||||
|
||||
// Return the pointer into the string at the current position.
|
||||
const char *utf8_data() const {
|
||||
return it_;
|
||||
}
|
||||
|
||||
// Iterator equality operators.
|
||||
friend bool operator==(const CI &lhs, const CI &rhs) {
|
||||
return lhs.it_ == rhs.it_;
|
||||
}
|
||||
friend bool operator!=(const CI &lhs, const CI &rhs) {
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
private:
|
||||
friend class UNICHAR;
|
||||
explicit const_iterator(const char *it) : it_(it) {}
|
||||
|
||||
const char *it_; // Pointer into the string.
|
||||
};
|
||||
|
||||
// Create a start/end iterator pointing to a string. Note that these methods
|
||||
// are static and do NOT create a copy or take ownership of the underlying
|
||||
// array.
|
||||
static const_iterator begin(const char *utf8_str, int byte_length);
|
||||
static const_iterator end(const char *utf8_str, int byte_length);
|
||||
|
||||
// Converts a utf-8 string to a vector of unicodes.
|
||||
// Returns an empty vector if the input contains invalid UTF-8.
|
||||
static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
|
||||
// Converts a vector of unicodes to a utf8 string.
|
||||
// Returns an empty string if the input contains an invalid unicode.
|
||||
static std::string UTF32ToUTF8(const std::vector<char32> &str32);
|
||||
|
||||
private:
|
||||
// A UTF-8 representation of 1 or more Unicode characters.
|
||||
// The last element (chars[UNICHAR_LEN - 1]) is a length if
|
||||
// its value < UNICHAR_LEN, otherwise it is a genuine character.
|
||||
char chars[UNICHAR_LEN]{};
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCUTIL_UNICHAR_H_
|
|
@ -1,34 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: version.h
|
||||
// Description: Version information
|
||||
//
|
||||
// (C) Copyright 2018, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_VERSION_H_
|
||||
#define TESSERACT_API_VERSION_H_
|
||||
|
||||
// clang-format off
|
||||
|
||||
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
|
||||
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
|
||||
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
|
||||
|
||||
#define TESSERACT_VERSION \
|
||||
(TESSERACT_MAJOR_VERSION << 16 | \
|
||||
TESSERACT_MINOR_VERSION << 8 | \
|
||||
TESSERACT_MICRO_VERSION)
|
||||
|
||||
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
|
||||
|
||||
// clang-format on
|
||||
|
||||
#endif // TESSERACT_API_VERSION_H_
|
|
@ -1,812 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: baseapi.h
|
||||
// Description: Simple API for calling tesseract.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_BASEAPI_H_
|
||||
#define TESSERACT_API_BASEAPI_H_
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config_auto.h" // DISABLED_LEGACY_ENGINE
|
||||
#endif
|
||||
|
||||
#include "export.h"
|
||||
#include "pageiterator.h"
|
||||
#include "publictypes.h"
|
||||
#include "resultiterator.h"
|
||||
#include "unichar.h"
|
||||
|
||||
#include "version.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <vector> // for std::vector
|
||||
|
||||
struct Pix;
|
||||
struct Pixa;
|
||||
struct Boxa;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class PAGE_RES;
|
||||
class ParagraphModel;
|
||||
class BLOCK_LIST;
|
||||
class ETEXT_DESC;
|
||||
struct OSResults;
|
||||
class UNICHARSET;
|
||||
|
||||
class Dawg;
|
||||
class Dict;
|
||||
class EquationDetect;
|
||||
class PageIterator;
|
||||
class ImageThresholder;
|
||||
class LTRResultIterator;
|
||||
class ResultIterator;
|
||||
class MutableIterator;
|
||||
class TessResultRenderer;
|
||||
class Tesseract;
|
||||
|
||||
// Function to read a std::vector<char> from a whole file.
|
||||
// Returns false on failure.
|
||||
using FileReader = bool (*)(const char *filename, std::vector<char> *data);
|
||||
|
||||
using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
|
||||
bool) const;
|
||||
using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
|
||||
int, const char *, int);
|
||||
|
||||
/**
|
||||
* Base class for all tesseract APIs.
|
||||
* Specific classes can add ability to work on different inputs or produce
|
||||
* different outputs.
|
||||
* This class is mostly an interface layer on top of the Tesseract instance
|
||||
* class to hide the data types so that users of this class don't have to
|
||||
* include any other Tesseract headers.
|
||||
*/
|
||||
class TESS_API TessBaseAPI {
|
||||
public:
|
||||
TessBaseAPI();
|
||||
virtual ~TessBaseAPI();
|
||||
// Copy constructor and assignment operator are currently unsupported.
|
||||
TessBaseAPI(TessBaseAPI const &) = delete;
|
||||
TessBaseAPI &operator=(TessBaseAPI const &) = delete;
|
||||
|
||||
/**
|
||||
* Returns the version identifier as a static string. Do not delete.
|
||||
*/
|
||||
static const char *Version();
|
||||
|
||||
/**
|
||||
* If compiled with OpenCL AND an available OpenCL
|
||||
* device is deemed faster than serial code, then
|
||||
* "device" is populated with the cl_device_id
|
||||
* and returns sizeof(cl_device_id)
|
||||
* otherwise *device=nullptr and returns 0.
|
||||
*/
|
||||
static size_t getOpenCLDevice(void **device);
|
||||
|
||||
/**
|
||||
* Set the name of the input file. Needed for training and
|
||||
* reading a UNLV zone file, and for searchable PDF output.
|
||||
*/
|
||||
void SetInputName(const char *name);
|
||||
/**
|
||||
* These functions are required for searchable PDF output.
|
||||
* We need our hands on the input file so that we can include
|
||||
* it in the PDF without transcoding. If that is not possible,
|
||||
* we need the original image. Finally, resolution metadata
|
||||
* is stored in the PDF so we need that as well.
|
||||
*/
|
||||
const char *GetInputName();
|
||||
// Takes ownership of the input pix.
|
||||
void SetInputImage(Pix *pix);
|
||||
Pix *GetInputImage();
|
||||
int GetSourceYResolution();
|
||||
const char *GetDatapath();
|
||||
|
||||
/** Set the name of the bonus output files. Needed only for debugging. */
|
||||
void SetOutputName(const char *name);
|
||||
|
||||
/**
|
||||
* Set the value of an internal "parameter."
|
||||
* Supply the name of the parameter and the value as a string, just as
|
||||
* you would in a config file.
|
||||
* Returns false if the name lookup failed.
|
||||
* Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
|
||||
* Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
|
||||
* SetVariable may be used before Init, but settings will revert to
|
||||
* defaults on End().
|
||||
*
|
||||
* Note: Must be called after Init(). Only works for non-init variables
|
||||
* (init variables should be passed to Init()).
|
||||
*/
|
||||
bool SetVariable(const char *name, const char *value);
|
||||
bool SetDebugVariable(const char *name, const char *value);
|
||||
|
||||
/**
|
||||
* Returns true if the parameter was found among Tesseract parameters.
|
||||
* Fills in value with the value of the parameter.
|
||||
*/
|
||||
bool GetIntVariable(const char *name, int *value) const;
|
||||
bool GetBoolVariable(const char *name, bool *value) const;
|
||||
bool GetDoubleVariable(const char *name, double *value) const;
|
||||
|
||||
/**
|
||||
* Returns the pointer to the string that represents the value of the
|
||||
* parameter if it was found among Tesseract parameters.
|
||||
*/
|
||||
const char *GetStringVariable(const char *name) const;
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Print Tesseract fonts table to the given file.
|
||||
*/
|
||||
void PrintFontsTable(FILE *fp) const;
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Print Tesseract parameters to the given file.
|
||||
*/
|
||||
void PrintVariables(FILE *fp) const;
|
||||
|
||||
/**
|
||||
* Get value of named variable as a string, if it exists.
|
||||
*/
|
||||
bool GetVariableAsString(const char *name, std::string *val) const;
|
||||
|
||||
/**
|
||||
* Instances are now mostly thread-safe and totally independent,
|
||||
* but some global parameters remain. Basically it is safe to use multiple
|
||||
* TessBaseAPIs in different threads in parallel, UNLESS:
|
||||
* you use SetVariable on some of the Params in classify and textord.
|
||||
* If you do, then the effect will be to change it for all your instances.
|
||||
*
|
||||
* Start tesseract. Returns zero on success and -1 on failure.
|
||||
* NOTE that the only members that may be called before Init are those
|
||||
* listed above here in the class definition.
|
||||
*
|
||||
* The datapath must be the name of the tessdata directory.
|
||||
* The language is (usually) an ISO 639-3 string or nullptr will default to
|
||||
* eng. It is entirely safe (and eventually will be efficient too) to call
|
||||
* Init multiple times on the same instance to change language, or just
|
||||
* to reset the classifier.
|
||||
* The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
|
||||
* that multiple languages are to be loaded. Eg hin+eng will load Hindi and
|
||||
* English. Languages may specify internally that they want to be loaded
|
||||
* with one or more other languages, so the ~ sign is available to override
|
||||
* that. Eg if hin were set to load eng by default, then hin+~eng would force
|
||||
* loading only hin. The number of loaded languages is limited only by
|
||||
* memory, with the caveat that loading additional languages will impact
|
||||
* both speed and accuracy, as there is more work to do to decide on the
|
||||
* applicable language, and there is more chance of hallucinating incorrect
|
||||
* words.
|
||||
* WARNING: On changing languages, all Tesseract parameters are reset
|
||||
* back to their default values. (Which may vary between languages.)
|
||||
* If you have a rare need to set a Variable that controls
|
||||
* initialization for a second call to Init you should explicitly
|
||||
* call End() and then use SetVariable before Init. This is only a very
|
||||
* rare use case, since there are very few uses that require any parameters
|
||||
* to be set before Init.
|
||||
*
|
||||
* If set_only_non_debug_params is true, only params that do not contain
|
||||
* "debug" in the name will be set.
|
||||
*/
|
||||
int Init(const char *datapath, const char *language, OcrEngineMode mode,
|
||||
char **configs, int configs_size,
|
||||
const std::vector<std::string> *vars_vec,
|
||||
const std::vector<std::string> *vars_values,
|
||||
bool set_only_non_debug_params);
|
||||
int Init(const char *datapath, const char *language, OcrEngineMode oem) {
|
||||
return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
|
||||
}
|
||||
int Init(const char *datapath, const char *language) {
|
||||
return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
|
||||
false);
|
||||
}
|
||||
// In-memory version reads the traineddata file directly from the given
|
||||
// data[data_size] array, and/or reads data via a FileReader.
|
||||
int Init(const char *data, int data_size, const char *language,
|
||||
OcrEngineMode mode, char **configs, int configs_size,
|
||||
const std::vector<std::string> *vars_vec,
|
||||
const std::vector<std::string> *vars_values,
|
||||
bool set_only_non_debug_params, FileReader reader);
|
||||
|
||||
/**
|
||||
* Returns the languages string used in the last valid initialization.
|
||||
* If the last initialization specified "deu+hin" then that will be
|
||||
* returned. If hin loaded eng automatically as well, then that will
|
||||
* not be included in this list. To find the languages actually
|
||||
* loaded use GetLoadedLanguagesAsVector.
|
||||
* The returned string should NOT be deleted.
|
||||
*/
|
||||
const char *GetInitLanguagesAsString() const;
|
||||
|
||||
/**
|
||||
* Returns the loaded languages in the vector of std::string.
|
||||
* Includes all languages loaded by the last Init, including those loaded
|
||||
* as dependencies of other loaded languages.
|
||||
*/
|
||||
void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
|
||||
|
||||
/**
|
||||
* Returns the available languages in the sorted vector of std::string.
|
||||
*/
|
||||
void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
|
||||
|
||||
/**
|
||||
* Init only for page layout analysis. Use only for calls to SetImage and
|
||||
* AnalysePage. Calls that attempt recognition will generate an error.
|
||||
*/
|
||||
void InitForAnalysePage();
|
||||
|
||||
/**
|
||||
* Read a "config" file containing a set of param, value pairs.
|
||||
* Searches the standard places: tessdata/configs, tessdata/tessconfigs
|
||||
* and also accepts a relative or absolute path name.
|
||||
* Note: only non-init params will be set (init params are set by Init()).
|
||||
*/
|
||||
void ReadConfigFile(const char *filename);
|
||||
/** Same as above, but only set debug params from the given config file. */
|
||||
void ReadDebugConfigFile(const char *filename);
|
||||
|
||||
/**
|
||||
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
|
||||
* The mode is stored as an IntParam so it can also be modified by
|
||||
* ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
|
||||
*/
|
||||
void SetPageSegMode(PageSegMode mode);
|
||||
|
||||
/** Return the current page segmentation mode. */
|
||||
PageSegMode GetPageSegMode() const;
|
||||
|
||||
/**
|
||||
* Recognize a rectangle from an image and return the result as a string.
|
||||
* May be called many times for a single Init.
|
||||
* Currently has no error checking.
|
||||
* Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
|
||||
* Palette color images will not work properly and must be converted to
|
||||
* 24 bit.
|
||||
* Binary images of 1 bit per pixel may also be given but they must be
|
||||
* byte packed with the MSB of the first byte being the first pixel, and a
|
||||
* 1 represents WHITE. For binary images set bytes_per_pixel=0.
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
*
|
||||
* Note that TesseractRect is the simplified convenience interface.
|
||||
* For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
|
||||
* and one or more of the Get*Text functions below.
|
||||
*/
|
||||
char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
|
||||
int bytes_per_line, int left, int top, int width,
|
||||
int height);
|
||||
|
||||
/**
|
||||
* Call between pages or documents etc to free up memory and forget
|
||||
* adaptive data.
|
||||
*/
|
||||
void ClearAdaptiveClassifier();
|
||||
|
||||
/**
|
||||
* @defgroup AdvancedAPI Advanced API
|
||||
* The following methods break TesseractRect into pieces, so you can
|
||||
* get hold of the thresholded image, get the text in different formats,
|
||||
* get bounding boxes, confidences etc.
|
||||
*/
|
||||
/* @{ */
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. Format is as
|
||||
* TesseractRect above. Copies the image buffer and converts to Pix.
|
||||
* SetImage clears all recognition results, and sets the rectangle to the
|
||||
* full image, so it may be followed immediately by a GetUTF8Text, and it
|
||||
* will automatically perform recognition.
|
||||
*/
|
||||
void SetImage(const unsigned char *imagedata, int width, int height,
|
||||
int bytes_per_pixel, int bytes_per_line);
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. As with SetImage above,
|
||||
* Tesseract takes its own copy of the image, so it need not persist until
|
||||
* after Recognize.
|
||||
* Pix vs raw, which to use?
|
||||
* Use Pix where possible. Tesseract uses Pix as its internal representation
|
||||
* and it is therefore more efficient to provide a Pix directly.
|
||||
*/
|
||||
void SetImage(Pix *pix);
|
||||
|
||||
/**
|
||||
* Set the resolution of the source image in pixels per inch so font size
|
||||
* information can be calculated in results. Call this after SetImage().
|
||||
*/
|
||||
void SetSourceResolution(int ppi);
|
||||
|
||||
/**
|
||||
* Restrict recognition to a sub-rectangle of the image. Call after SetImage.
|
||||
* Each SetRectangle clears the recogntion results so multiple rectangles
|
||||
* can be recognized with the same image.
|
||||
*/
|
||||
void SetRectangle(int left, int top, int width, int height);
|
||||
|
||||
/**
|
||||
* Get a copy of the internal thresholded image from Tesseract.
|
||||
* Caller takes ownership of the Pix and must pixDestroy it.
|
||||
* May be called any time after SetImage, or after TesseractRect.
|
||||
*/
|
||||
Pix *GetThresholdedImage();
|
||||
|
||||
/**
|
||||
* Get the result of page layout analysis as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
*/
|
||||
Boxa *GetRegions(Pixa **pixa);
|
||||
|
||||
/**
|
||||
* Get the textlines as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* If raw_image is true, then extract from the original image instead of the
|
||||
* thresholded image and pad by raw_padding pixels.
|
||||
* If blockids is not nullptr, the block-id of each line is also returned as
|
||||
* an array of one element per line. delete [] after use. If paraids is not
|
||||
* nullptr, the paragraph-id of each line within its block is also returned as
|
||||
* an array of one element per line. delete [] after use.
|
||||
*/
|
||||
Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
/*
|
||||
Helper method to extract from the thresholded image. (most common usage)
|
||||
*/
|
||||
Boxa *GetTextlines(Pixa **pixa, int **blockids) {
|
||||
return GetTextlines(false, 0, pixa, blockids, nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
|
||||
* pair, in reading order. Enables downstream handling of non-rectangular
|
||||
* regions.
|
||||
* Can be called before or after Recognize.
|
||||
* If blockids is not nullptr, the block-id of each line is also returned as
|
||||
* an array of one element per line. delete [] after use.
|
||||
*/
|
||||
Boxa *GetStrips(Pixa **pixa, int **blockids);
|
||||
|
||||
/**
|
||||
* Get the words as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
*/
|
||||
Boxa *GetWords(Pixa **pixa);
|
||||
|
||||
/**
|
||||
* Gets the individual connected (text) components (created
|
||||
* after pages segmentation step, but before recognition)
|
||||
* as a leptonica-style Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* Note: the caller is responsible for calling boxaDestroy()
|
||||
* on the returned Boxa array and pixaDestroy() on cc array.
|
||||
*/
|
||||
Boxa *GetConnectedComponents(Pixa **cc);
|
||||
|
||||
/**
|
||||
* Get the given level kind of components (block, textline, word etc.) as a
|
||||
* leptonica-style Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* If blockids is not nullptr, the block-id of each component is also returned
|
||||
* as an array of one element per component. delete [] after use.
|
||||
* If blockids is not nullptr, the paragraph-id of each component with its
|
||||
* block is also returned as an array of one element per component. delete []
|
||||
* after use. If raw_image is true, then portions of the original image are
|
||||
* extracted instead of the thresholded image and padded with raw_padding. If
|
||||
* text_only is true, then only text components are returned.
|
||||
*/
|
||||
Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
|
||||
bool raw_image, int raw_padding, Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
// Helper function to get binary images with no padding (most common usage).
|
||||
Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
|
||||
Pixa **pixa, int **blockids) {
|
||||
return GetComponentImages(level, text_only, false, 0, pixa, blockids,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the scale factor of the thresholded image that would be returned by
|
||||
* GetThresholdedImage() and the various GetX() methods that call
|
||||
* GetComponentImages().
|
||||
* Returns 0 if no thresholder has been set.
|
||||
*/
|
||||
int GetThresholdedImageScaleFactor() const;
|
||||
|
||||
/**
|
||||
* Runs page layout analysis in the mode set by SetPageSegMode.
|
||||
* May optionally be called prior to Recognize to get access to just
|
||||
* the page layout results. Returns an iterator to the results.
|
||||
* If merge_similar_words is true, words are combined where suitable for use
|
||||
* with a line recognizer. Use if you want to use AnalyseLayout to find the
|
||||
* textlines, and then want to process textline fragments with an external
|
||||
* line recognizer.
|
||||
* Returns nullptr on error or an empty page.
|
||||
* The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
PageIterator *AnalyseLayout();
|
||||
PageIterator *AnalyseLayout(bool merge_similar_words);
|
||||
|
||||
/**
|
||||
* Recognize the image from SetAndThresholdImage, generating Tesseract
|
||||
* internal structures. Returns 0 on success.
|
||||
* Optional. The Get*Text functions below will call Recognize if needed.
|
||||
* After Recognize, the output is kept internally until the next SetImage.
|
||||
*/
|
||||
int Recognize(ETEXT_DESC *monitor);
|
||||
|
||||
/**
|
||||
* Methods to retrieve information after SetAndThresholdImage(),
|
||||
* Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
|
||||
*/
|
||||
|
||||
/**
|
||||
* Turns images into symbolic text.
|
||||
*
|
||||
* filename can point to a single image, a multi-page TIFF,
|
||||
* or a plain text list of image filenames.
|
||||
*
|
||||
* retry_config is useful for debugging. If not nullptr, you can fall
|
||||
* back to an alternate configuration if a page fails for some
|
||||
* reason.
|
||||
*
|
||||
* timeout_millisec terminates processing if any single page
|
||||
* takes too long. Set to 0 for unlimited time.
|
||||
*
|
||||
* renderer is responible for creating the output. For example,
|
||||
* use the TessTextRenderer if you want plaintext output, or
|
||||
* the TessPDFRender to produce searchable PDF.
|
||||
*
|
||||
* If tessedit_page_number is non-negative, will only process that
|
||||
* single page. Works for multi-page tiff file, or filelist.
|
||||
*
|
||||
* Returns true if successful, false on error.
|
||||
*/
|
||||
bool ProcessPages(const char *filename, const char *retry_config,
|
||||
int timeout_millisec, TessResultRenderer *renderer);
|
||||
// Does the real work of ProcessPages.
|
||||
bool ProcessPagesInternal(const char *filename, const char *retry_config,
|
||||
int timeout_millisec, TessResultRenderer *renderer);
|
||||
|
||||
/**
|
||||
* Turn a single image into symbolic text.
|
||||
*
|
||||
* The pix is the image processed. filename and page_index are
|
||||
* metadata used by side-effect processes, such as reading a box
|
||||
* file or formatting as hOCR.
|
||||
*
|
||||
* See ProcessPages for descriptions of other parameters.
|
||||
*/
|
||||
bool ProcessPage(Pix *pix, int page_index, const char *filename,
|
||||
const char *retry_config, int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
|
||||
/**
|
||||
* Get a reading-order iterator to the results of LayoutAnalysis and/or
|
||||
* Recognize. The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
ResultIterator *GetIterator();
|
||||
|
||||
/**
|
||||
* Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
|
||||
* The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
MutableIterator *GetMutableIterator();
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetUTF8Text();
|
||||
|
||||
/**
|
||||
* Make a HTML-formatted string with hOCR markup from the internal
|
||||
* data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* monitor can be used to
|
||||
* cancel the recognition
|
||||
* receive progress callbacks
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
|
||||
|
||||
/**
|
||||
* Make a HTML-formatted string with hOCR markup from the internal
|
||||
* data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetHOCRText(int page_number);
|
||||
|
||||
/**
|
||||
* Make an XML-formatted string with Alto markup from the internal
|
||||
* data structures.
|
||||
*/
|
||||
char *GetAltoText(ETEXT_DESC *monitor, int page_number);
|
||||
|
||||
/**
|
||||
* Make an XML-formatted string with Alto markup from the internal
|
||||
* data structures.
|
||||
*/
|
||||
char *GetAltoText(int page_number);
|
||||
|
||||
/**
|
||||
* Make a TSV-formatted string from the internal data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetTSVText(int page_number);
|
||||
|
||||
/**
|
||||
* Make a box file for LSTM training from the internal data structures.
|
||||
* Constructs coordinates in the original image - not just the rectangle.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetLSTMBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded in the same
|
||||
* format as a box file used in training.
|
||||
* Constructs coordinates in the original image - not just the rectangle.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded in the same
|
||||
* format as a WordStr box file used in training.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetWordStrBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UNLV format Latin-1 with specific reject and suspect codes.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetUNLVText();
|
||||
|
||||
/**
|
||||
* Detect the orientation of the input image and apparent script (alphabet).
|
||||
* orient_deg is the detected clockwise rotation of the input image in degrees
|
||||
* (0, 90, 180, 270)
|
||||
* orient_conf is the confidence (15.0 is reasonably confident)
|
||||
* script_name is an ASCII string, the name of the script, e.g. "Latin"
|
||||
* script_conf is confidence level in the script
|
||||
* Returns true on success and writes values to each parameter as an output
|
||||
*/
|
||||
bool DetectOrientationScript(int *orient_deg, float *orient_conf,
|
||||
const char **script_name, float *script_conf);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
* page_number is a 0-based page index that will appear in the osd file.
|
||||
*/
|
||||
char *GetOsdText(int page_number);
|
||||
|
||||
/** Returns the (average) confidence value between 0 and 100. */
|
||||
int MeanTextConf();
|
||||
/**
|
||||
* Returns all word confidences (between 0 and 100) in an array, terminated
|
||||
* by -1. The calling function must delete [] after use.
|
||||
* The number of confidences should correspond to the number of space-
|
||||
* delimited words in GetUTF8Text.
|
||||
*/
|
||||
int *AllWordConfidences();
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
/**
|
||||
* Applies the given word to the adaptive classifier if possible.
|
||||
* The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
|
||||
* tell the boundaries of the graphemes.
|
||||
* Assumes that SetImage/SetRectangle have been used to set the image
|
||||
* to the given word. The mode arg should be PSM_SINGLE_WORD or
|
||||
* PSM_CIRCLE_WORD, as that will be used to control layout analysis.
|
||||
* The currently set PageSegMode is preserved.
|
||||
* Returns false if adaption was not possible for some reason.
|
||||
*/
|
||||
bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Free up recognition results and any stored image data, without actually
|
||||
* freeing any recognition data that would be time-consuming to reload.
|
||||
* Afterwards, you must call SetImage or TesseractRect before doing
|
||||
* any Recognize or Get* operation.
|
||||
*/
|
||||
void Clear();
|
||||
|
||||
/**
|
||||
* Close down tesseract and free up all memory. End() is equivalent to
|
||||
* destructing and reconstructing your TessBaseAPI.
|
||||
* Once End() has been used, none of the other API functions may be used
|
||||
* other than Init and anything declared above it in the class definition.
|
||||
*/
|
||||
void End();
|
||||
|
||||
/**
|
||||
* Clear any library-level memory caches.
|
||||
* There are a variety of expensive-to-load constant data structures (mostly
|
||||
* language dictionaries) that are cached globally -- surviving the Init()
|
||||
* and End() of individual TessBaseAPI's. This function allows the clearing
|
||||
* of these caches.
|
||||
**/
|
||||
static void ClearPersistentCache();
|
||||
|
||||
/**
|
||||
* Check whether a word is valid according to Tesseract's language model
|
||||
* @return 0 if the word is invalid, non-zero if valid.
|
||||
* @warning temporary! This function will be removed from here and placed
|
||||
* in a separate API at some future time.
|
||||
*/
|
||||
int IsValidWord(const char *word) const;
|
||||
// Returns true if utf8_character is defined in the UniCharset.
|
||||
bool IsValidCharacter(const char *utf8_character) const;
|
||||
|
||||
bool GetTextDirection(int *out_offset, float *out_slope);
|
||||
|
||||
/** Sets Dict::letter_is_okay_ function to point to the given function. */
|
||||
void SetDictFunc(DictFunc f);
|
||||
|
||||
/** Sets Dict::probability_in_context_ function to point to the given
|
||||
* function.
|
||||
*/
|
||||
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
|
||||
|
||||
/**
|
||||
* Estimates the Orientation And Script of the image.
|
||||
* @return true if the image was processed successfully.
|
||||
*/
|
||||
bool DetectOS(OSResults *);
|
||||
|
||||
/**
|
||||
* Return text orientation of each block as determined by an earlier run
|
||||
* of layout analysis.
|
||||
*/
|
||||
void GetBlockTextOrientations(int **block_orientation,
|
||||
bool **vertical_writing);
|
||||
|
||||
/** This method returns the string form of the specified unichar. */
|
||||
const char *GetUnichar(int unichar_id) const;
|
||||
|
||||
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
|
||||
const Dawg *GetDawg(int i) const;
|
||||
|
||||
/** Return the number of dawgs loaded into tesseract_ object. */
|
||||
int NumDawgs() const;
|
||||
|
||||
Tesseract *tesseract() const {
|
||||
return tesseract_;
|
||||
}
|
||||
|
||||
OcrEngineMode oem() const {
|
||||
return last_oem_requested_;
|
||||
}
|
||||
|
||||
void set_min_orientation_margin(double margin);
|
||||
/* @} */
|
||||
|
||||
protected:
|
||||
/** Common code for setting the image. Returns true if Init has been called.
|
||||
*/
|
||||
bool InternalSetImage();
|
||||
|
||||
/**
|
||||
* Run the thresholder to make the thresholded image. If pix is not nullptr,
|
||||
* the source is thresholded to pix instead of the internal IMAGE.
|
||||
*/
|
||||
virtual bool Threshold(Pix **pix);
|
||||
|
||||
/**
|
||||
* Find lines from the image making the BLOCK_LIST.
|
||||
* @return 0 on success.
|
||||
*/
|
||||
int FindLines();
|
||||
|
||||
/** Delete the pageres and block list ready for a new page. */
|
||||
void ClearResults();
|
||||
|
||||
/**
|
||||
* Return an LTR Result Iterator -- used only for training, as we really want
|
||||
* to ignore all BiDi smarts at that point.
|
||||
* delete once you're done with it.
|
||||
*/
|
||||
LTRResultIterator *GetLTRIterator();
|
||||
|
||||
/**
|
||||
* Return the length of the output text string, as UTF8, assuming
|
||||
* one newline per line and one per block, with a terminator,
|
||||
* and assuming a single character reject marker for each rejected character.
|
||||
* Also return the number of recognized blobs in blob_count.
|
||||
*/
|
||||
int TextLength(int *blob_count) const;
|
||||
|
||||
//// paragraphs.cpp ////////////////////////////////////////////////////
|
||||
void DetectParagraphs(bool after_text_recognition);
|
||||
|
||||
const PAGE_RES *GetPageRes() const {
|
||||
return page_res_;
|
||||
}
|
||||
|
||||
protected:
|
||||
Tesseract *tesseract_; ///< The underlying data object.
|
||||
Tesseract *osd_tesseract_; ///< For orientation & script detection.
|
||||
EquationDetect *equ_detect_; ///< The equation detector.
|
||||
FileReader reader_; ///< Reads files from any filesystem.
|
||||
ImageThresholder *thresholder_; ///< Image thresholding module.
|
||||
std::vector<ParagraphModel *> *paragraph_models_;
|
||||
BLOCK_LIST *block_list_; ///< The page layout.
|
||||
PAGE_RES *page_res_; ///< The page-level data.
|
||||
std::string input_file_; ///< Name used by training code.
|
||||
std::string output_file_; ///< Name used by debug code.
|
||||
std::string datapath_; ///< Current location of tessdata.
|
||||
std::string language_; ///< Last initialized language.
|
||||
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
|
||||
bool recognition_done_; ///< page_res_ contains recognition data.
|
||||
|
||||
/**
|
||||
* @defgroup ThresholderParams Thresholder Parameters
|
||||
* Parameters saved from the Thresholder. Needed to rebuild coordinates.
|
||||
*/
|
||||
/* @{ */
|
||||
int rect_left_;
|
||||
int rect_top_;
|
||||
int rect_width_;
|
||||
int rect_height_;
|
||||
int image_width_;
|
||||
int image_height_;
|
||||
/* @} */
|
||||
|
||||
private:
|
||||
// A list of image filenames gets special consideration
|
||||
bool ProcessPagesFileList(FILE *fp, std::string *buf,
|
||||
const char *retry_config, int timeout_millisec,
|
||||
TessResultRenderer *renderer,
|
||||
int tessedit_page_number);
|
||||
// TIFF supports multipage so gets special consideration.
|
||||
bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
|
||||
const char *filename, const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer,
|
||||
int tessedit_page_number);
|
||||
}; // class TessBaseAPI.
|
||||
|
||||
/** Escape a char string - remove &<>"' with HTML codes. */
|
||||
std::string HOcrEscape(const char *text);
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_API_BASEAPI_H_
|
|
@ -1,484 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: capi.h
|
||||
// Description: C-API TessBaseAPI
|
||||
//
|
||||
// (C) Copyright 2012, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef API_CAPI_H_
|
||||
#define API_CAPI_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
# include <tesseract/baseapi.h>
|
||||
# include <tesseract/ocrclass.h>
|
||||
# include <tesseract/pageiterator.h>
|
||||
# include <tesseract/renderer.h>
|
||||
# include <tesseract/resultiterator.h>
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef BOOL
|
||||
# define BOOL int
|
||||
# define TRUE 1
|
||||
# define FALSE 0
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
typedef tesseract::TessResultRenderer TessResultRenderer;
|
||||
typedef tesseract::TessBaseAPI TessBaseAPI;
|
||||
typedef tesseract::PageIterator TessPageIterator;
|
||||
typedef tesseract::ResultIterator TessResultIterator;
|
||||
typedef tesseract::MutableIterator TessMutableIterator;
|
||||
typedef tesseract::ChoiceIterator TessChoiceIterator;
|
||||
typedef tesseract::OcrEngineMode TessOcrEngineMode;
|
||||
typedef tesseract::PageSegMode TessPageSegMode;
|
||||
typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
|
||||
typedef tesseract::Orientation TessOrientation;
|
||||
typedef tesseract::ParagraphJustification TessParagraphJustification;
|
||||
typedef tesseract::WritingDirection TessWritingDirection;
|
||||
typedef tesseract::TextlineOrder TessTextlineOrder;
|
||||
typedef tesseract::PolyBlockType TessPolyBlockType;
|
||||
typedef tesseract::ETEXT_DESC ETEXT_DESC;
|
||||
#else
|
||||
typedef struct TessResultRenderer TessResultRenderer;
|
||||
typedef struct TessBaseAPI TessBaseAPI;
|
||||
typedef struct TessPageIterator TessPageIterator;
|
||||
typedef struct TessResultIterator TessResultIterator;
|
||||
typedef struct TessMutableIterator TessMutableIterator;
|
||||
typedef struct TessChoiceIterator TessChoiceIterator;
|
||||
typedef enum TessOcrEngineMode {
|
||||
OEM_TESSERACT_ONLY,
|
||||
OEM_LSTM_ONLY,
|
||||
OEM_TESSERACT_LSTM_COMBINED,
|
||||
OEM_DEFAULT
|
||||
} TessOcrEngineMode;
|
||||
typedef enum TessPageSegMode {
|
||||
PSM_OSD_ONLY,
|
||||
PSM_AUTO_OSD,
|
||||
PSM_AUTO_ONLY,
|
||||
PSM_AUTO,
|
||||
PSM_SINGLE_COLUMN,
|
||||
PSM_SINGLE_BLOCK_VERT_TEXT,
|
||||
PSM_SINGLE_BLOCK,
|
||||
PSM_SINGLE_LINE,
|
||||
PSM_SINGLE_WORD,
|
||||
PSM_CIRCLE_WORD,
|
||||
PSM_SINGLE_CHAR,
|
||||
PSM_SPARSE_TEXT,
|
||||
PSM_SPARSE_TEXT_OSD,
|
||||
PSM_RAW_LINE,
|
||||
PSM_COUNT
|
||||
} TessPageSegMode;
|
||||
typedef enum TessPageIteratorLevel {
|
||||
RIL_BLOCK,
|
||||
RIL_PARA,
|
||||
RIL_TEXTLINE,
|
||||
RIL_WORD,
|
||||
RIL_SYMBOL
|
||||
} TessPageIteratorLevel;
|
||||
typedef enum TessPolyBlockType {
|
||||
PT_UNKNOWN,
|
||||
PT_FLOWING_TEXT,
|
||||
PT_HEADING_TEXT,
|
||||
PT_PULLOUT_TEXT,
|
||||
PT_EQUATION,
|
||||
PT_INLINE_EQUATION,
|
||||
PT_TABLE,
|
||||
PT_VERTICAL_TEXT,
|
||||
PT_CAPTION_TEXT,
|
||||
PT_FLOWING_IMAGE,
|
||||
PT_HEADING_IMAGE,
|
||||
PT_PULLOUT_IMAGE,
|
||||
PT_HORZ_LINE,
|
||||
PT_VERT_LINE,
|
||||
PT_NOISE,
|
||||
PT_COUNT
|
||||
} TessPolyBlockType;
|
||||
typedef enum TessOrientation {
|
||||
ORIENTATION_PAGE_UP,
|
||||
ORIENTATION_PAGE_RIGHT,
|
||||
ORIENTATION_PAGE_DOWN,
|
||||
ORIENTATION_PAGE_LEFT
|
||||
} TessOrientation;
|
||||
typedef enum TessParagraphJustification {
|
||||
JUSTIFICATION_UNKNOWN,
|
||||
JUSTIFICATION_LEFT,
|
||||
JUSTIFICATION_CENTER,
|
||||
JUSTIFICATION_RIGHT
|
||||
} TessParagraphJustification;
|
||||
typedef enum TessWritingDirection {
|
||||
WRITING_DIRECTION_LEFT_TO_RIGHT,
|
||||
WRITING_DIRECTION_RIGHT_TO_LEFT,
|
||||
WRITING_DIRECTION_TOP_TO_BOTTOM
|
||||
} TessWritingDirection;
|
||||
typedef enum TessTextlineOrder {
|
||||
TEXTLINE_ORDER_LEFT_TO_RIGHT,
|
||||
TEXTLINE_ORDER_RIGHT_TO_LEFT,
|
||||
TEXTLINE_ORDER_TOP_TO_BOTTOM
|
||||
} TessTextlineOrder;
|
||||
typedef struct ETEXT_DESC ETEXT_DESC;
|
||||
#endif
|
||||
|
||||
typedef bool (*TessCancelFunc)(void *cancel_this, int words);
|
||||
typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
|
||||
int bottom);
|
||||
|
||||
struct Pix;
|
||||
struct Boxa;
|
||||
struct Pixa;
|
||||
|
||||
/* General free functions */
|
||||
|
||||
TESS_API const char *TessVersion();
|
||||
TESS_API void TessDeleteText(const char *text);
|
||||
TESS_API void TessDeleteTextArray(char **arr);
|
||||
TESS_API void TessDeleteIntArray(const int *arr);
|
||||
|
||||
/* Renderer API */
|
||||
TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
|
||||
BOOL font_info);
|
||||
TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
|
||||
const char *datadir,
|
||||
BOOL textonly);
|
||||
TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
|
||||
const char *outputbase);
|
||||
|
||||
TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
|
||||
TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
|
||||
TessResultRenderer *next);
|
||||
TESS_API TessResultRenderer *TessResultRendererNext(
|
||||
TessResultRenderer *renderer);
|
||||
TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
|
||||
const char *title);
|
||||
TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
|
||||
TessBaseAPI *api);
|
||||
TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
|
||||
|
||||
TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
|
||||
TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
|
||||
TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
|
||||
|
||||
/* Base API */
|
||||
|
||||
TESS_API TessBaseAPI *TessBaseAPICreate();
|
||||
TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
|
||||
|
||||
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
|
||||
|
||||
TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
|
||||
TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
|
||||
TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
|
||||
TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
|
||||
|
||||
TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
|
||||
const char *value);
|
||||
TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
|
||||
const char *value);
|
||||
|
||||
TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
|
||||
const char *name, int *value);
|
||||
TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
|
||||
const char *name, BOOL *value);
|
||||
TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
|
||||
const char *name, double *value);
|
||||
TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
|
||||
const char *name);
|
||||
|
||||
TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
|
||||
TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
|
||||
TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode oem,
|
||||
char **configs, int configs_size);
|
||||
TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode oem);
|
||||
TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language);
|
||||
|
||||
TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode mode,
|
||||
char **configs, int configs_size, char **vars_vec,
|
||||
char **vars_values, size_t vars_vec_size,
|
||||
BOOL set_only_non_debug_params);
|
||||
|
||||
TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
|
||||
const char *language, TessOcrEngineMode mode,
|
||||
char **configs, int configs_size, char **vars_vec,
|
||||
char **vars_values, size_t vars_vec_size,
|
||||
BOOL set_only_non_debug_params);
|
||||
|
||||
TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
|
||||
const TessBaseAPI *handle);
|
||||
TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
|
||||
const TessBaseAPI *handle);
|
||||
TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
|
||||
const TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
|
||||
TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
|
||||
TessPageSegMode mode);
|
||||
TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
|
||||
const unsigned char *imagedata,
|
||||
int bytes_per_pixel, int bytes_per_line,
|
||||
int left, int top, int width, int height);
|
||||
|
||||
TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
|
||||
const unsigned char *imagedata, int width,
|
||||
int height, int bytes_per_pixel,
|
||||
int bytes_per_line);
|
||||
TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
|
||||
|
||||
TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
|
||||
|
||||
TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
|
||||
int width, int height);
|
||||
|
||||
TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
|
||||
TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
|
||||
struct Pixa **pixa);
|
||||
TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
|
||||
struct Pixa **pixa,
|
||||
int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
|
||||
BOOL raw_image, int raw_padding,
|
||||
struct Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
|
||||
struct Pixa **pixa, int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
|
||||
struct Pixa **pixa);
|
||||
TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
|
||||
struct Pixa **cc);
|
||||
TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
|
||||
TessPageIteratorLevel level,
|
||||
BOOL text_only,
|
||||
struct Pixa **pixa,
|
||||
int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
|
||||
TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
|
||||
BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
|
||||
int **paraids);
|
||||
|
||||
TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
|
||||
const TessBaseAPI *handle);
|
||||
|
||||
TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
|
||||
|
||||
TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
|
||||
const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
|
||||
int page_index, const char *filename,
|
||||
const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
|
||||
TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
|
||||
TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
|
||||
TessBaseAPI *handle);
|
||||
|
||||
TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
|
||||
TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
|
||||
int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
|
||||
TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
|
||||
TessPageSegMode mode,
|
||||
const char *wordstr);
|
||||
#endif // #ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
|
||||
TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
|
||||
TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
|
||||
float *out_slope);
|
||||
|
||||
TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
|
||||
|
||||
TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
// Call TessDeleteText(*best_script_name) to free memory allocated by this
|
||||
// function
|
||||
TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
|
||||
int *orient_deg,
|
||||
float *orient_conf,
|
||||
const char **script_name,
|
||||
float *script_conf);
|
||||
#endif // #ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
|
||||
double margin);
|
||||
|
||||
TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
|
||||
int **block_orientation,
|
||||
bool **vertical_writing);
|
||||
|
||||
/* Page iterator */
|
||||
|
||||
TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
|
||||
|
||||
TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
|
||||
|
||||
TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
|
||||
|
||||
TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
|
||||
TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
|
||||
TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
TessPageIteratorLevel element);
|
||||
|
||||
TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
int *left, int *top, int *right,
|
||||
int *bottom);
|
||||
|
||||
TESS_API TessPolyBlockType
|
||||
TessPageIteratorBlockType(const TessPageIterator *handle);
|
||||
|
||||
TESS_API struct Pix *TessPageIteratorGetBinaryImage(
|
||||
const TessPageIterator *handle, TessPageIteratorLevel level);
|
||||
|
||||
TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
int padding,
|
||||
struct Pix *original_image,
|
||||
int *left, int *top);
|
||||
|
||||
TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level, int *x1,
|
||||
int *y1, int *x2, int *y2);
|
||||
|
||||
TESS_API void TessPageIteratorOrientation(
|
||||
TessPageIterator *handle, TessOrientation *orientation,
|
||||
TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
|
||||
float *deskew_angle);
|
||||
|
||||
TESS_API void TessPageIteratorParagraphInfo(
|
||||
TessPageIterator *handle, TessParagraphJustification *justification,
|
||||
BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
|
||||
|
||||
/* Result iterator */
|
||||
|
||||
TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
|
||||
TESS_API TessResultIterator *TessResultIteratorCopy(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
|
||||
TessResultIterator *handle);
|
||||
TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
|
||||
const TessResultIterator *handle);
|
||||
|
||||
TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API const char *TessResultIteratorWordRecognitionLanguage(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API const char *TessResultIteratorWordFontAttributes(
|
||||
const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
|
||||
BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
|
||||
int *pointsize, int *font_id);
|
||||
|
||||
TESS_API BOOL
|
||||
TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
|
||||
TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
|
||||
|
||||
TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
|
||||
TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
|
||||
TESS_API const char *TessChoiceIteratorGetUTF8Text(
|
||||
const TessChoiceIterator *handle);
|
||||
TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
|
||||
|
||||
/* Progress monitor */
|
||||
|
||||
TESS_API ETEXT_DESC *TessMonitorCreate();
|
||||
TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
|
||||
TessCancelFunc cancelFunc);
|
||||
TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
|
||||
TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
|
||||
TessProgressFunc progressFunc);
|
||||
TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // API_CAPI_H_
|
|
@ -1,37 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: export.h
|
||||
// Description: Place holder
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_PLATFORM_H_
|
||||
#define TESSERACT_PLATFORM_H_
|
||||
|
||||
#ifndef TESS_API
|
||||
# if defined(_WIN32) || defined(__CYGWIN__)
|
||||
# if defined(TESS_EXPORTS)
|
||||
# define TESS_API __declspec(dllexport)
|
||||
# elif defined(TESS_IMPORTS)
|
||||
# define TESS_API __declspec(dllimport)
|
||||
# else
|
||||
# define TESS_API
|
||||
# endif
|
||||
# else
|
||||
# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
|
||||
# define TESS_API __attribute__((visibility("default")))
|
||||
# else
|
||||
# define TESS_API
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif // TESSERACT_PLATFORM_H_
|
|
@ -1,235 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: ltrresultiterator.h
|
||||
// Description: Iterator for tesseract results in strict left-to-right
|
||||
// order that avoids using tesseract internal data structures.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
||||
|
||||
#include "export.h" // for TESS_API
|
||||
#include "pageiterator.h" // for PageIterator
|
||||
#include "publictypes.h" // for PageIteratorLevel
|
||||
#include "unichar.h" // for StrongScriptDirection
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class BLOB_CHOICE_IT;
|
||||
class PAGE_RES;
|
||||
class WERD_RES;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
// Class to iterate over tesseract results, providing access to all levels
|
||||
// of the page hierarchy, without including any tesseract headers or having
|
||||
// to handle any tesseract structures.
|
||||
// WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
// therefore can only be used while the TessBaseAPI class still exists and
|
||||
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
// DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
// See tesseract/publictypes.h for the definition of PageIteratorLevel.
|
||||
// See also base class PageIterator, which contains the bulk of the interface.
|
||||
// LTRResultIterator adds text-specific methods for access to OCR output.
|
||||
|
||||
class TESS_API LTRResultIterator : public PageIterator {
|
||||
friend class ChoiceIterator;
|
||||
|
||||
public:
|
||||
// page_res and tesseract come directly from the BaseAPI.
|
||||
// The rectangle parameters are copied indirectly from the Thresholder,
|
||||
// via the BaseAPI. They represent the coordinates of some rectangle in an
|
||||
// original image (in top-left-origin coordinates) and therefore the top-left
|
||||
// needs to be added to any output boxes in order to specify coordinates
|
||||
// in the original image. See TessBaseAPI::SetRectangle.
|
||||
// The scale and scaled_yres are in case the Thresholder scaled the image
|
||||
// rectangle prior to thresholding. Any coordinates in tesseract's image
|
||||
// must be divided by scale before adding (rect_left, rect_top).
|
||||
// The scaled_yres indicates the effective resolution of the binary image
|
||||
// that tesseract has been given by the Thresholder.
|
||||
// After the constructor, Begin has already been called.
|
||||
LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
|
||||
int scaled_yres, int rect_left, int rect_top,
|
||||
int rect_width, int rect_height);
|
||||
|
||||
~LTRResultIterator() override;
|
||||
|
||||
// LTRResultIterators may be copied! This makes it possible to iterate over
|
||||
// all the objects at a lower level, while maintaining an iterator to
|
||||
// objects at a higher level. These constructors DO NOT CALL Begin, so
|
||||
// iterations will continue from the location of src.
|
||||
// TODO: For now the copy constructor and operator= only need the base class
|
||||
// versions, but if new data members are added, don't forget to add them!
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
|
||||
// See PageIterator.
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
// Returns the null terminated UTF-8 encoded text string for the current
|
||||
// object at the given level. Use delete [] to free after use.
|
||||
char *GetUTF8Text(PageIteratorLevel level) const;
|
||||
|
||||
// Set the string inserted at the end of each text line. "\n" by default.
|
||||
void SetLineSeparator(const char *new_line);
|
||||
|
||||
// Set the string inserted at the end of each paragraph. "\n" by default.
|
||||
void SetParagraphSeparator(const char *new_para);
|
||||
|
||||
// Returns the mean confidence of the current object at the given level.
|
||||
// The number should be interpreted as a percent probability. (0.0f-100.0f)
|
||||
float Confidence(PageIteratorLevel level) const;
|
||||
|
||||
// ============= Functions that refer to words only ============.
|
||||
|
||||
// Returns the font attributes of the current word. If iterating at a higher
|
||||
// level object than words, eg textlines, then this will return the
|
||||
// attributes of the first word in that textline.
|
||||
// The actual return value is a string representing a font name. It points
|
||||
// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
|
||||
// the iterator itself, ie rendered invalid by various members of
|
||||
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
|
||||
// Pointsize is returned in printers points (1/72 inch.)
|
||||
const char *WordFontAttributes(bool *is_bold, bool *is_italic,
|
||||
bool *is_underlined, bool *is_monospace,
|
||||
bool *is_serif, bool *is_smallcaps,
|
||||
int *pointsize, int *font_id) const;
|
||||
|
||||
// Return the name of the language used to recognize this word.
|
||||
// On error, nullptr. Do not delete this pointer.
|
||||
const char *WordRecognitionLanguage() const;
|
||||
|
||||
// Return the overall directionality of this word.
|
||||
StrongScriptDirection WordDirection() const;
|
||||
|
||||
// Returns true if the current word was found in a dictionary.
|
||||
bool WordIsFromDictionary() const;
|
||||
|
||||
// Returns the number of blanks before the current word.
|
||||
int BlanksBeforeWord() const;
|
||||
|
||||
// Returns true if the current word is numeric.
|
||||
bool WordIsNumeric() const;
|
||||
|
||||
// Returns true if the word contains blamer information.
|
||||
bool HasBlamerInfo() const;
|
||||
|
||||
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
|
||||
// of the current word.
|
||||
const void *GetParamsTrainingBundle() const;
|
||||
|
||||
// Returns a pointer to the string with blamer information for this word.
|
||||
// Assumes that the word's blamer_bundle is not nullptr.
|
||||
const char *GetBlamerDebug() const;
|
||||
|
||||
// Returns a pointer to the string with misadaption information for this word.
|
||||
// Assumes that the word's blamer_bundle is not nullptr.
|
||||
const char *GetBlamerMisadaptionDebug() const;
|
||||
|
||||
// Returns true if a truth string was recorded for the current word.
|
||||
bool HasTruthString() const;
|
||||
|
||||
// Returns true if the given string is equivalent to the truth string for
|
||||
// the current word.
|
||||
bool EquivalentToTruth(const char *str) const;
|
||||
|
||||
// Returns a null terminated UTF-8 encoded truth string for the current word.
|
||||
// Use delete [] to free after use.
|
||||
char *WordTruthUTF8Text() const;
|
||||
|
||||
// Returns a null terminated UTF-8 encoded normalized OCR string for the
|
||||
// current word. Use delete [] to free after use.
|
||||
char *WordNormedUTF8Text() const;
|
||||
|
||||
// Returns a pointer to serialized choice lattice.
|
||||
// Fills lattice_size with the number of bytes in lattice data.
|
||||
const char *WordLattice(int *lattice_size) const;
|
||||
|
||||
// ============= Functions that refer to symbols only ============.
|
||||
|
||||
// Returns true if the current symbol is a superscript.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsSuperscript() const;
|
||||
// Returns true if the current symbol is a subscript.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsSubscript() const;
|
||||
// Returns true if the current symbol is a dropcap.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsDropcap() const;
|
||||
|
||||
protected:
|
||||
const char *line_separator_;
|
||||
const char *paragraph_separator_;
|
||||
};
|
||||
|
||||
// Class to iterate over the classifier choices for a single RIL_SYMBOL.
|
||||
class TESS_API ChoiceIterator {
|
||||
public:
|
||||
// Construction is from a LTRResultIterator that points to the symbol of
|
||||
// interest. The ChoiceIterator allows a one-shot iteration over the
|
||||
// choices for this symbol and after that it is useless.
|
||||
explicit ChoiceIterator(const LTRResultIterator &result_it);
|
||||
~ChoiceIterator();
|
||||
|
||||
// Moves to the next choice for the symbol and returns false if there
|
||||
// are none left.
|
||||
bool Next();
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
// Returns the null terminated UTF-8 encoded text string for the current
|
||||
// choice.
|
||||
// NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
|
||||
// internal structure and should NOT be delete[]ed to free after use.
|
||||
const char *GetUTF8Text() const;
|
||||
|
||||
// Returns the confidence of the current choice depending on the used language
|
||||
// data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
|
||||
// choices for one symbol should roughly add up to 1.0f.
|
||||
// If only traineddata of the legacy engine is used, the number should be
|
||||
// interpreted as a percent probability. (0.0f-100.0f) In this case
|
||||
// probabilities won't add up to 100. Each one stands on its own.
|
||||
float Confidence() const;
|
||||
|
||||
// Returns a vector containing all timesteps, which belong to the currently
|
||||
// selected symbol. A timestep is a vector containing pairs of symbols and
|
||||
// floating point numbers. The number states the probability for the
|
||||
// corresponding symbol.
|
||||
std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
|
||||
|
||||
private:
|
||||
// clears the remaining spaces out of the results and adapt the probabilities
|
||||
void filterSpaces();
|
||||
// Pointer to the WERD_RES object owned by the API.
|
||||
WERD_RES *word_res_;
|
||||
// Iterator over the blob choices.
|
||||
BLOB_CHOICE_IT *choice_it_;
|
||||
std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
|
||||
std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
|
||||
|
||||
const int *tstep_index_;
|
||||
// regulates the rating granularity
|
||||
double rating_coefficient_;
|
||||
// leading blanks
|
||||
int blanks_before_word_;
|
||||
// true when there is lstm engine related trained data
|
||||
bool oemLSTM_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
|
@ -1,158 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
/**********************************************************************
|
||||
* File: ocrclass.h
|
||||
* Description: Class definitions and constants for the OCR API.
|
||||
* Author: Hewlett-Packard Co
|
||||
*
|
||||
* (C) Copyright 1996, Hewlett-Packard Co.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
/**********************************************************************
|
||||
* This file contains typedefs for all the structures used by
|
||||
* the HP OCR interface.
|
||||
* The structures are designed to allow them to be used with any
|
||||
* structure alignment up to 8.
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef CCUTIL_OCRCLASS_H_
|
||||
#define CCUTIL_OCRCLASS_H_
|
||||
|
||||
#include <chrono>
|
||||
#include <ctime>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/**********************************************************************
|
||||
* EANYCODE_CHAR
|
||||
* Description of a single character. The character code is defined by
|
||||
* the character set of the current font.
|
||||
* Output text is sent as an array of these structures.
|
||||
* Spaces and line endings in the output are represented in the
|
||||
* structures of the surrounding characters. They are not directly
|
||||
* represented as characters.
|
||||
* The first character in a word has a positive value of blanks.
|
||||
* Missing information should be set to the defaults in the comments.
|
||||
* If word bounds are known, but not character bounds, then the top and
|
||||
* bottom of each character should be those of the word. The left of the
|
||||
* first and right of the last char in each word should be set. All other
|
||||
* lefts and rights should be set to -1.
|
||||
* If set, the values of right and bottom are left+width and top+height.
|
||||
* Most of the members come directly from the parameters to ocr_append_char.
|
||||
* The formatting member uses the enhancement parameter and combines the
|
||||
* line direction stuff into the top 3 bits.
|
||||
* The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
|
||||
* 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
|
||||
* the coding is, only that it is backwards compatible with the previous
|
||||
* version.
|
||||
**********************************************************************/
|
||||
|
||||
struct EANYCODE_CHAR { /*single character */
|
||||
// It should be noted that the format for char_code for version 2.0 and beyond
|
||||
// is UTF8 which means that ASCII characters will come out as one structure
|
||||
// but other characters will be returned in two or more instances of this
|
||||
// structure with a single byte of the UTF8 code in each, but each will have
|
||||
// the same bounding box. Programs which want to handle languagues with
|
||||
// different characters sets will need to handle extended characters
|
||||
// appropriately, but *all* code needs to be prepared to receive UTF8 coded
|
||||
// characters for characters such as bullet and fancy quotes.
|
||||
uint16_t char_code; /*character itself */
|
||||
int16_t left; /*of char (-1) */
|
||||
int16_t right; /*of char (-1) */
|
||||
int16_t top; /*of char (-1) */
|
||||
int16_t bottom; /*of char (-1) */
|
||||
int16_t font_index; /*what font (0) */
|
||||
uint8_t confidence; /*0=perfect, 100=reject (0/100) */
|
||||
uint8_t point_size; /*of char, 72=i inch, (10) */
|
||||
int8_t blanks; /*no of spaces before this char (1) */
|
||||
uint8_t formatting; /*char formatting (0) */
|
||||
};
|
||||
|
||||
/**********************************************************************
|
||||
* ETEXT_DESC
|
||||
* Description of the output of the OCR engine.
|
||||
* This structure is used as both a progress monitor and the final
|
||||
* output header, since it needs to be a valid progress monitor while
|
||||
* the OCR engine is storing its output to shared memory.
|
||||
* During progress, all the buffer info is -1.
|
||||
* Progress starts at 0 and increases to 100 during OCR. No other constraint.
|
||||
* Additionally the progress callback contains the bounding box of the word that
|
||||
* is currently being processed.
|
||||
* Every progress callback, the OCR engine must set ocr_alive to 1.
|
||||
* The HP side will set ocr_alive to 0. Repeated failure to reset
|
||||
* to 1 indicates that the OCR engine is dead.
|
||||
* If the cancel function is not null then it is called with the number of
|
||||
* user words found. If it returns true then operation is cancelled.
|
||||
**********************************************************************/
|
||||
class ETEXT_DESC;
|
||||
|
||||
using CANCEL_FUNC = bool (*)(void *, int);
|
||||
using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
|
||||
using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
|
||||
|
||||
class ETEXT_DESC { // output header
|
||||
public:
|
||||
int16_t count{0}; /// chars in this buffer(0)
|
||||
int16_t progress{0}; /// percent complete increasing (0-100)
|
||||
/** Progress monitor covers word recognition and it does not cover layout
|
||||
* analysis.
|
||||
* See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
|
||||
int8_t more_to_come{0}; /// true if not last
|
||||
volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
|
||||
int8_t err_code{0}; /// for errcode use
|
||||
CANCEL_FUNC cancel{nullptr}; /// returns true to cancel
|
||||
PROGRESS_FUNC progress_callback{
|
||||
nullptr}; /// called whenever progress increases
|
||||
PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
|
||||
void *cancel_this{nullptr}; /// this or other data for cancel
|
||||
std::chrono::steady_clock::time_point end_time;
|
||||
/// Time to stop. Expected to be set only
|
||||
/// by call to set_deadline_msecs().
|
||||
EANYCODE_CHAR text[1]{}; /// character data
|
||||
|
||||
ETEXT_DESC() : progress_callback2(&default_progress_func) {
|
||||
end_time = std::chrono::time_point<std::chrono::steady_clock,
|
||||
std::chrono::milliseconds>();
|
||||
}
|
||||
|
||||
// Sets the end time to be deadline_msecs milliseconds from now.
|
||||
void set_deadline_msecs(int32_t deadline_msecs) {
|
||||
if (deadline_msecs > 0) {
|
||||
end_time = std::chrono::steady_clock::now() +
|
||||
std::chrono::milliseconds(deadline_msecs);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns false if we've not passed the end_time, or have not set a deadline.
|
||||
bool deadline_exceeded() const {
|
||||
if (end_time.time_since_epoch() ==
|
||||
std::chrono::steady_clock::duration::zero()) {
|
||||
return false;
|
||||
}
|
||||
auto now = std::chrono::steady_clock::now();
|
||||
return (now > end_time);
|
||||
}
|
||||
|
||||
private:
|
||||
static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
|
||||
int top, int bottom) {
|
||||
if (ths->progress_callback != nullptr) {
|
||||
return (*(ths->progress_callback))(ths->progress, left, right, top,
|
||||
bottom);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // CCUTIL_OCRCLASS_H_
|
|
@ -1,139 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: osdetect.h
|
||||
// Description: Orientation and script detection.
|
||||
// Author: Samuel Charron
|
||||
// Ranjith Unnikrishnan
|
||||
//
|
||||
// (C) Copyright 2008, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_OSDETECT_H_
|
||||
#define TESSERACT_CCMAIN_OSDETECT_H_
|
||||
|
||||
#include "export.h" // for TESS_API
|
||||
|
||||
#include <vector> // for std::vector
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class BLOBNBOX;
|
||||
class BLOBNBOX_CLIST;
|
||||
class BLOB_CHOICE_LIST;
|
||||
class TO_BLOCK_LIST;
|
||||
class UNICHARSET;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
|
||||
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
|
||||
|
||||
struct OSBestResult {
|
||||
OSBestResult()
|
||||
: orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
|
||||
int orientation_id;
|
||||
int script_id;
|
||||
float sconfidence;
|
||||
float oconfidence;
|
||||
};
|
||||
|
||||
struct OSResults {
|
||||
OSResults() : unicharset(nullptr) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
|
||||
scripts_na[i][j] = 0;
|
||||
}
|
||||
orientations[i] = 0;
|
||||
}
|
||||
}
|
||||
void update_best_orientation();
|
||||
// Set the estimate of the orientation to the given id.
|
||||
void set_best_orientation(int orientation_id);
|
||||
// Update/Compute the best estimate of the script assuming the given
|
||||
// orientation id.
|
||||
void update_best_script(int orientation_id);
|
||||
// Return the index of the script with the highest score for this orientation.
|
||||
TESS_API int get_best_script(int orientation_id) const;
|
||||
// Accumulate scores with given OSResults instance and update the best script.
|
||||
void accumulate(const OSResults &osr);
|
||||
|
||||
// Print statistics.
|
||||
void print_scores(void) const;
|
||||
void print_scores(int orientation_id) const;
|
||||
|
||||
// Array holding scores for each orientation id [0,3].
|
||||
// Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
|
||||
// page respectively, where the values refer to the amount of clockwise
|
||||
// rotation to be applied to the page for the text to be upright and readable.
|
||||
float orientations[4];
|
||||
// Script confidence scores for each of 4 possible orientations.
|
||||
float scripts_na[4][kMaxNumberOfScripts];
|
||||
|
||||
UNICHARSET *unicharset;
|
||||
OSBestResult best_result;
|
||||
};
|
||||
|
||||
class OrientationDetector {
|
||||
public:
|
||||
OrientationDetector(const std::vector<int> *allowed_scripts,
|
||||
OSResults *results);
|
||||
bool detect_blob(BLOB_CHOICE_LIST *scores);
|
||||
int get_orientation();
|
||||
|
||||
private:
|
||||
OSResults *osr_;
|
||||
const std::vector<int> *allowed_scripts_;
|
||||
};
|
||||
|
||||
class ScriptDetector {
|
||||
public:
|
||||
ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
void detect_blob(BLOB_CHOICE_LIST *scores);
|
||||
bool must_stop(int orientation) const;
|
||||
|
||||
private:
|
||||
OSResults *osr_;
|
||||
static const char *korean_script_;
|
||||
static const char *japanese_script_;
|
||||
static const char *fraktur_script_;
|
||||
int korean_id_;
|
||||
int japanese_id_;
|
||||
int katakana_id_;
|
||||
int hiragana_id_;
|
||||
int han_id_;
|
||||
int hangul_id_;
|
||||
int latin_id_;
|
||||
int fraktur_id_;
|
||||
tesseract::Tesseract *tess_;
|
||||
const std::vector<int> *allowed_scripts_;
|
||||
};
|
||||
|
||||
int orientation_and_script_detection(const char *filename, OSResults *,
|
||||
tesseract::Tesseract *);
|
||||
|
||||
int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
|
||||
int os_detect_blobs(const std::vector<int> *allowed_scripts,
|
||||
BLOBNBOX_CLIST *blob_list, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
|
||||
bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
|
||||
OSResults *, tesseract::Tesseract *tess);
|
||||
|
||||
// Helper method to convert an orientation index to its value in degrees.
|
||||
// The value represents the amount of clockwise rotation in degrees that must be
|
||||
// applied for the text to be upright (readable).
|
||||
TESS_API int OrientationIdToValue(const int &id);
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCMAIN_OSDETECT_H_
|
|
@ -1,364 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: pageiterator.h
|
||||
// Description: Iterator for tesseract page structure that avoids using
|
||||
// tesseract internal data structures.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_PAGEITERATOR_H_
|
||||
|
||||
#include "export.h"
|
||||
#include "publictypes.h"
|
||||
|
||||
struct Pix;
|
||||
struct Pta;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
struct BlamerBundle;
|
||||
class C_BLOB_IT;
|
||||
class PAGE_RES;
|
||||
class PAGE_RES_IT;
|
||||
class WERD;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
/**
|
||||
* Class to iterate over tesseract page structure, providing access to all
|
||||
* levels of the page hierarchy, without including any tesseract headers or
|
||||
* having to handle any tesseract structures.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
* See tesseract/publictypes.h for the definition of PageIteratorLevel.
|
||||
* See also ResultIterator, derived from PageIterator, which adds in the
|
||||
* ability to access OCR output with text-specific methods.
|
||||
*/
|
||||
|
||||
class TESS_API PageIterator {
|
||||
public:
|
||||
/**
|
||||
* page_res and tesseract come directly from the BaseAPI.
|
||||
* The rectangle parameters are copied indirectly from the Thresholder,
|
||||
* via the BaseAPI. They represent the coordinates of some rectangle in an
|
||||
* original image (in top-left-origin coordinates) and therefore the top-left
|
||||
* needs to be added to any output boxes in order to specify coordinates
|
||||
* in the original image. See TessBaseAPI::SetRectangle.
|
||||
* The scale and scaled_yres are in case the Thresholder scaled the image
|
||||
* rectangle prior to thresholding. Any coordinates in tesseract's image
|
||||
* must be divided by scale before adding (rect_left, rect_top).
|
||||
* The scaled_yres indicates the effective resolution of the binary image
|
||||
* that tesseract has been given by the Thresholder.
|
||||
* After the constructor, Begin has already been called.
|
||||
*/
|
||||
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
|
||||
int scaled_yres, int rect_left, int rect_top, int rect_width,
|
||||
int rect_height);
|
||||
virtual ~PageIterator();
|
||||
|
||||
/**
|
||||
* Page/ResultIterators may be copied! This makes it possible to iterate over
|
||||
* all the objects at a lower level, while maintaining an iterator to
|
||||
* objects at a higher level. These constructors DO NOT CALL Begin, so
|
||||
* iterations will continue from the location of src.
|
||||
*/
|
||||
PageIterator(const PageIterator &src);
|
||||
const PageIterator &operator=(const PageIterator &src);
|
||||
|
||||
/** Are we positioned at the same location as other? */
|
||||
bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
|
||||
/**
|
||||
* Moves the iterator to point to the start of the page to begin an
|
||||
* iteration.
|
||||
*/
|
||||
virtual void Begin();
|
||||
|
||||
/**
|
||||
* Moves the iterator to the beginning of the paragraph.
|
||||
* This class implements this functionality by moving it to the zero indexed
|
||||
* blob of the first (leftmost) word on the first row of the paragraph.
|
||||
*/
|
||||
virtual void RestartParagraph();
|
||||
|
||||
/**
|
||||
* Return whether this iterator points anywhere in the first textline of a
|
||||
* paragraph.
|
||||
*/
|
||||
bool IsWithinFirstTextlineOfParagraph() const;
|
||||
|
||||
/**
|
||||
* Moves the iterator to the beginning of the text line.
|
||||
* This class implements this functionality by moving it to the zero indexed
|
||||
* blob of the first (leftmost) word of the row.
|
||||
*/
|
||||
virtual void RestartRow();
|
||||
|
||||
/**
|
||||
* Moves to the start of the next object at the given level in the
|
||||
* page hierarchy, and returns false if the end of the page was reached.
|
||||
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
|
||||
* PageIteratorLevel level values will visit each non-text block once.
|
||||
* Think of non text blocks as containing a single para, with a single line,
|
||||
* with a single imaginary word.
|
||||
* Calls to Next with different levels may be freely intermixed.
|
||||
* This function iterates words in right-to-left scripts correctly, if
|
||||
* the appropriate language has been loaded into Tesseract.
|
||||
*/
|
||||
virtual bool Next(PageIteratorLevel level);
|
||||
|
||||
/**
|
||||
* Returns true if the iterator is at the start of an object at the given
|
||||
* level.
|
||||
*
|
||||
* For instance, suppose an iterator it is pointed to the first symbol of the
|
||||
* first word of the third line of the second paragraph of the first block in
|
||||
* a page, then:
|
||||
* it.IsAtBeginningOf(RIL_BLOCK) = false
|
||||
* it.IsAtBeginningOf(RIL_PARA) = false
|
||||
* it.IsAtBeginningOf(RIL_TEXTLINE) = true
|
||||
* it.IsAtBeginningOf(RIL_WORD) = true
|
||||
* it.IsAtBeginningOf(RIL_SYMBOL) = true
|
||||
*/
|
||||
virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns whether the iterator is positioned at the last element in a
|
||||
* given level. (e.g. the last word in a line, the last line in a block)
|
||||
*
|
||||
* Here's some two-paragraph example
|
||||
* text. It starts off innocuously
|
||||
* enough but quickly turns bizarre.
|
||||
* The author inserts a cornucopia
|
||||
* of words to guard against confused
|
||||
* references.
|
||||
*
|
||||
* Now take an iterator it pointed to the start of "bizarre."
|
||||
* it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
|
||||
* it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
|
||||
* it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
|
||||
*/
|
||||
virtual bool IsAtFinalElement(PageIteratorLevel level,
|
||||
PageIteratorLevel element) const;
|
||||
|
||||
/**
|
||||
* Returns whether this iterator is positioned
|
||||
* before other: -1
|
||||
* equal to other: 0
|
||||
* after other: 1
|
||||
*/
|
||||
int Cmp(const PageIterator &other) const;
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
// Coordinate system:
|
||||
// Integer coordinates are at the cracks between the pixels.
|
||||
// The top-left corner of the top-left pixel in the image is at (0,0).
|
||||
// The bottom-right corner of the bottom-right pixel in the image is at
|
||||
// (width, height).
|
||||
// Every bounding box goes from the top-left of the top-left contained
|
||||
// pixel to the bottom-right of the bottom-right contained pixel, so
|
||||
// the bounding box of the single top-left pixel in the image is:
|
||||
// (0,0)->(1,1).
|
||||
// If an image rectangle has been set in the API, then returned coordinates
|
||||
// relate to the original (full) image, rather than the rectangle.
|
||||
|
||||
/**
|
||||
* Controls what to include in a bounding box. Bounding boxes of all levels
|
||||
* between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
|
||||
* Between layout analysis and recognition, it isn't known where all
|
||||
* diacritics belong, so this control is used to include or exclude some
|
||||
* diacritics that are above or below the main body of the word. In most cases
|
||||
* where the placement is obvious, and after recognition, it doesn't make as
|
||||
* much difference, as the diacritics will already be included in the word.
|
||||
*/
|
||||
void SetBoundingBoxComponents(bool include_upper_dots,
|
||||
bool include_lower_dots) {
|
||||
include_upper_dots_ = include_upper_dots;
|
||||
include_lower_dots_ = include_lower_dots;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the bounding rectangle of the current object at the given level.
|
||||
* See comment on coordinate system above.
|
||||
* Returns false if there is no such object at the current position.
|
||||
* The returned bounding box is guaranteed to match the size and position
|
||||
* of the image returned by GetBinaryImage, but may clip foreground pixels
|
||||
* from a grey image. The padding argument to GetImage can be used to expand
|
||||
* the image to include more foreground pixels. See GetImage below.
|
||||
*/
|
||||
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
|
||||
int *bottom) const;
|
||||
bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
|
||||
int *right, int *bottom) const;
|
||||
/**
|
||||
* Returns the bounding rectangle of the object in a coordinate system of the
|
||||
* working image rectangle having its origin at (rect_left_, rect_top_) with
|
||||
* respect to the original image and is scaled by a factor scale_.
|
||||
*/
|
||||
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
|
||||
int *right, int *bottom) const;
|
||||
|
||||
/** Returns whether there is no object of a given level. */
|
||||
bool Empty(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns the type of the current block.
|
||||
* See tesseract/publictypes.h for PolyBlockType.
|
||||
*/
|
||||
PolyBlockType BlockType() const;
|
||||
|
||||
/**
|
||||
* Returns the polygon outline of the current block. The returned Pta must
|
||||
* be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
|
||||
* of the polygon, and the last edge is the line segment between the last
|
||||
* point and the first point. nullptr will be returned if the iterator is
|
||||
* at the end of the document or layout analysis was not used.
|
||||
*/
|
||||
Pta *BlockPolygon() const;
|
||||
|
||||
/**
|
||||
* Returns a binary image of the current object at the given level.
|
||||
* The position and size match the return from BoundingBoxInternal, and so
|
||||
* this could be upscaled with respect to the original input image.
|
||||
* Use pixDestroy to delete the image after use.
|
||||
*/
|
||||
Pix *GetBinaryImage(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns an image of the current object at the given level in greyscale
|
||||
* if available in the input. To guarantee a binary image use BinaryImage.
|
||||
* NOTE that in order to give the best possible image, the bounds are
|
||||
* expanded slightly over the binary connected component, by the supplied
|
||||
* padding, so the top-left position of the returned image is returned
|
||||
* in (left,top). These will most likely not match the coordinates
|
||||
* returned by BoundingBox.
|
||||
* If you do not supply an original image, you will get a binary one.
|
||||
* Use pixDestroy to delete the image after use.
|
||||
*/
|
||||
Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
|
||||
int *left, int *top) const;
|
||||
|
||||
/**
|
||||
* Returns the baseline of the current object at the given level.
|
||||
* The baseline is the line that passes through (x1, y1) and (x2, y2).
|
||||
* WARNING: with vertical text, baselines may be vertical!
|
||||
* Returns false if there is no baseline at the current position.
|
||||
*/
|
||||
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
|
||||
int *y2) const;
|
||||
|
||||
// Returns the attributes of the current row.
|
||||
void RowAttributes(float *row_height, float *descenders,
|
||||
float *ascenders) const;
|
||||
|
||||
/**
|
||||
* Returns orientation for the block the iterator points to.
|
||||
* orientation, writing_direction, textline_order: see publictypes.h
|
||||
* deskew_angle: after rotating the block so the text orientation is
|
||||
* upright, how many radians does one have to rotate the
|
||||
* block anti-clockwise for it to be level?
|
||||
* -Pi/4 <= deskew_angle <= Pi/4
|
||||
*/
|
||||
void Orientation(tesseract::Orientation *orientation,
|
||||
tesseract::WritingDirection *writing_direction,
|
||||
tesseract::TextlineOrder *textline_order,
|
||||
float *deskew_angle) const;
|
||||
|
||||
/**
|
||||
* Returns information about the current paragraph, if available.
|
||||
*
|
||||
* justification -
|
||||
* LEFT if ragged right, or fully justified and script is left-to-right.
|
||||
* RIGHT if ragged left, or fully justified and script is right-to-left.
|
||||
* unknown if it looks like source code or we have very few lines.
|
||||
* is_list_item -
|
||||
* true if we believe this is a member of an ordered or unordered list.
|
||||
* is_crown -
|
||||
* true if the first line of the paragraph is aligned with the other
|
||||
* lines of the paragraph even though subsequent paragraphs have first
|
||||
* line indents. This typically indicates that this is the continuation
|
||||
* of a previous paragraph or that it is the very first paragraph in
|
||||
* the chapter.
|
||||
* first_line_indent -
|
||||
* For LEFT aligned paragraphs, the first text line of paragraphs of
|
||||
* this kind are indented this many pixels from the left edge of the
|
||||
* rest of the paragraph.
|
||||
* for RIGHT aligned paragraphs, the first text line of paragraphs of
|
||||
* this kind are indented this many pixels from the right edge of the
|
||||
* rest of the paragraph.
|
||||
* NOTE 1: This value may be negative.
|
||||
* NOTE 2: if *is_crown == true, the first line of this paragraph is
|
||||
* actually flush, and first_line_indent is set to the "common"
|
||||
* first_line_indent for subsequent paragraphs in this block
|
||||
* of text.
|
||||
*/
|
||||
void ParagraphInfo(tesseract::ParagraphJustification *justification,
|
||||
bool *is_list_item, bool *is_crown,
|
||||
int *first_line_indent) const;
|
||||
|
||||
// If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
|
||||
// of the current word to the given pointer (takes ownership of the pointer)
|
||||
// and returns true.
|
||||
// Can only be used when iterating on the word level.
|
||||
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Sets up the internal data for iterating the blobs of a new word, then
|
||||
* moves the iterator to the given offset.
|
||||
*/
|
||||
void BeginWord(int offset);
|
||||
|
||||
/** Pointer to the page_res owned by the API. */
|
||||
PAGE_RES *page_res_;
|
||||
/** Pointer to the Tesseract object owned by the API. */
|
||||
Tesseract *tesseract_;
|
||||
/**
|
||||
* The iterator to the page_res_. Owned by this ResultIterator.
|
||||
* A pointer just to avoid dragging in Tesseract includes.
|
||||
*/
|
||||
PAGE_RES_IT *it_;
|
||||
/**
|
||||
* The current input WERD being iterated. If there is an output from OCR,
|
||||
* then word_ is nullptr. Owned by the API
|
||||
*/
|
||||
WERD *word_;
|
||||
/** The length of the current word_. */
|
||||
int word_length_;
|
||||
/** The current blob index within the word. */
|
||||
int blob_index_;
|
||||
/**
|
||||
* Iterator to the blobs within the word. If nullptr, then we are iterating
|
||||
* OCR results in the box_word.
|
||||
* Owned by this ResultIterator.
|
||||
*/
|
||||
C_BLOB_IT *cblob_it_;
|
||||
/** Control over what to include in bounding boxes. */
|
||||
bool include_upper_dots_;
|
||||
bool include_lower_dots_;
|
||||
/** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
|
||||
int scale_;
|
||||
int scaled_yres_;
|
||||
int rect_left_;
|
||||
int rect_top_;
|
||||
int rect_width_;
|
||||
int rect_height_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_
|
|
@ -1,281 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: publictypes.h
|
||||
// Description: Types used in both the API and internally
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// This file contains types that are used both by the API and internally
|
||||
// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
|
||||
// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
|
||||
// Restated: It is OK for low-level Tesseract files to include publictypes.h,
|
||||
// but not for the low-level tesseract code to include top-level API code.
|
||||
// This file should not use other Tesseract types, as that would drag
|
||||
// their includes into the API-level.
|
||||
|
||||
/** Number of printers' points in an inch. The unit of the pointsize return. */
|
||||
constexpr int kPointsPerInch = 72;
|
||||
/**
|
||||
* Minimum believable resolution. Used as a default if there is no other
|
||||
* information, as it is safer to under-estimate than over-estimate.
|
||||
*/
|
||||
constexpr int kMinCredibleResolution = 70;
|
||||
/** Maximum believable resolution. */
|
||||
constexpr int kMaxCredibleResolution = 2400;
|
||||
/**
|
||||
* Ratio between median blob size and likely resolution. Used to estimate
|
||||
* resolution when none is provided. This is basically 1/usual text size in
|
||||
* inches. */
|
||||
constexpr int kResolutionEstimationFactor = 10;
|
||||
|
||||
/**
|
||||
* Possible types for a POLY_BLOCK or ColPartition.
|
||||
* Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
|
||||
* below, as well as kPolyBlockNames in layout_test.cc.
|
||||
* Used extensively by ColPartition, and POLY_BLOCK.
|
||||
*/
|
||||
enum PolyBlockType {
|
||||
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
|
||||
PT_FLOWING_TEXT, // Text that lives inside a column.
|
||||
PT_HEADING_TEXT, // Text that spans more than one column.
|
||||
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
|
||||
PT_EQUATION, // Partition belonging to an equation region.
|
||||
PT_INLINE_EQUATION, // Partition has inline equation.
|
||||
PT_TABLE, // Partition belonging to a table region.
|
||||
PT_VERTICAL_TEXT, // Text-line runs vertically.
|
||||
PT_CAPTION_TEXT, // Text that belongs to an image.
|
||||
PT_FLOWING_IMAGE, // Image that lives inside a column.
|
||||
PT_HEADING_IMAGE, // Image that spans more than one column.
|
||||
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
|
||||
PT_HORZ_LINE, // Horizontal Line.
|
||||
PT_VERT_LINE, // Vertical Line.
|
||||
PT_NOISE, // Lies outside of any column.
|
||||
PT_COUNT
|
||||
};
|
||||
|
||||
/** Returns true if PolyBlockType is of horizontal line type */
|
||||
inline bool PTIsLineType(PolyBlockType type) {
|
||||
return type == PT_HORZ_LINE || type == PT_VERT_LINE;
|
||||
}
|
||||
/** Returns true if PolyBlockType is of image type */
|
||||
inline bool PTIsImageType(PolyBlockType type) {
|
||||
return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
|
||||
type == PT_PULLOUT_IMAGE;
|
||||
}
|
||||
/** Returns true if PolyBlockType is of text type */
|
||||
inline bool PTIsTextType(PolyBlockType type) {
|
||||
return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
|
||||
type == PT_PULLOUT_TEXT || type == PT_TABLE ||
|
||||
type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
|
||||
type == PT_INLINE_EQUATION;
|
||||
}
|
||||
// Returns true if PolyBlockType is of pullout(inter-column) type
|
||||
inline bool PTIsPulloutType(PolyBlockType type) {
|
||||
return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
|
||||
}
|
||||
|
||||
/**
|
||||
* +------------------+ Orientation Example:
|
||||
* | 1 Aaaa Aaaa Aaaa | ====================
|
||||
* | Aaa aa aaa aa | To left is a diagram of some (1) English and
|
||||
* | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit.
|
||||
* | 2 |
|
||||
* | ####### c c C | Upright Latin characters are represented as A and a.
|
||||
* | ####### c c c | '<' represents a latin character rotated
|
||||
* | < ####### c c c | anti-clockwise 90 degrees.
|
||||
* | < ####### c c |
|
||||
* | < ####### . c | Upright Chinese characters are represented C and c.
|
||||
* | 3 ####### c |
|
||||
* +------------------+ NOTA BENE: enum values here should match goodoc.proto
|
||||
|
||||
* If you orient your head so that "up" aligns with Orientation,
|
||||
* then the characters will appear "right side up" and readable.
|
||||
*
|
||||
* In the example above, both the English and Chinese paragraphs are oriented
|
||||
* so their "up" is the top of the page (page up). The photo credit is read
|
||||
* with one's head turned leftward ("up" is to page left).
|
||||
*
|
||||
* The values of this enum match the convention of Tesseract's osdetect.h
|
||||
*/
|
||||
enum Orientation {
|
||||
ORIENTATION_PAGE_UP = 0,
|
||||
ORIENTATION_PAGE_RIGHT = 1,
|
||||
ORIENTATION_PAGE_DOWN = 2,
|
||||
ORIENTATION_PAGE_LEFT = 3,
|
||||
};
|
||||
|
||||
/**
|
||||
* The grapheme clusters within a line of text are laid out logically
|
||||
* in this direction, judged when looking at the text line rotated so that
|
||||
* its Orientation is "page up".
|
||||
*
|
||||
* For English text, the writing direction is left-to-right. For the
|
||||
* Chinese text in the above example, the writing direction is top-to-bottom.
|
||||
*/
|
||||
enum WritingDirection {
|
||||
WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
|
||||
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
|
||||
WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
|
||||
};
|
||||
|
||||
/**
|
||||
* The text lines are read in the given sequence.
|
||||
*
|
||||
* In English, the order is top-to-bottom.
|
||||
* In Chinese, vertical text lines are read right-to-left. Mongolian is
|
||||
* written in vertical columns top to bottom like Chinese, but the lines
|
||||
* order left-to right.
|
||||
*
|
||||
* Note that only some combinations make sense. For example,
|
||||
* WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
|
||||
*/
|
||||
enum TextlineOrder {
|
||||
TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
|
||||
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
|
||||
TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
|
||||
};
|
||||
|
||||
/**
|
||||
* Possible modes for page layout analysis. These *must* be kept in order
|
||||
* of decreasing amount of layout analysis to be done, except for OSD_ONLY,
|
||||
* so that the inequality test macros below work.
|
||||
*/
|
||||
enum PageSegMode {
|
||||
PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
|
||||
PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
|
||||
///< script detection. (OSD)
|
||||
PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
|
||||
PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
|
||||
PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
|
||||
PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
|
||||
///< vertically aligned text.
|
||||
PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
|
||||
PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
|
||||
PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
|
||||
PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
|
||||
PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
|
||||
PSM_SPARSE_TEXT =
|
||||
11, ///< Find as much text as possible in no particular order.
|
||||
PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
|
||||
PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
|
||||
///< hacks that are Tesseract-specific.
|
||||
|
||||
PSM_COUNT ///< Number of enum entries.
|
||||
};
|
||||
|
||||
/**
|
||||
* Inline functions that act on a PageSegMode to determine whether components of
|
||||
* layout analysis are enabled.
|
||||
* *Depend critically on the order of elements of PageSegMode.*
|
||||
* NOTE that arg is an int for compatibility with INT_PARAM.
|
||||
*/
|
||||
inline bool PSM_OSD_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
|
||||
}
|
||||
inline bool PSM_SPARSE(int pageseg_mode) {
|
||||
return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
|
||||
}
|
||||
inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
|
||||
}
|
||||
inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
|
||||
return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
|
||||
pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
|
||||
/**
|
||||
* enum of the elements of the page hierarchy, used in ResultIterator
|
||||
* to provide functions that operate on each level without having to
|
||||
* have 5x as many functions.
|
||||
*/
|
||||
enum PageIteratorLevel {
|
||||
RIL_BLOCK, // Block of text/image/separator line.
|
||||
RIL_PARA, // Paragraph within a block.
|
||||
RIL_TEXTLINE, // Line within a paragraph.
|
||||
RIL_WORD, // Word within a textline.
|
||||
RIL_SYMBOL // Symbol/character within a word.
|
||||
};
|
||||
|
||||
/**
|
||||
* JUSTIFICATION_UNKNOWN
|
||||
* The alignment is not clearly one of the other options. This could happen
|
||||
* for example if there are only one or two lines of text or the text looks
|
||||
* like source code or poetry.
|
||||
*
|
||||
* NOTA BENE: Fully justified paragraphs (text aligned to both left and right
|
||||
* margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
|
||||
* is written with a left-to-right script and with JUSTIFICATION_RIGHT if
|
||||
* their text is written in a right-to-left script.
|
||||
*
|
||||
* Interpretation for text read in vertical lines:
|
||||
* "Left" is wherever the starting reading position is.
|
||||
*
|
||||
* JUSTIFICATION_LEFT
|
||||
* Each line, except possibly the first, is flush to the same left tab stop.
|
||||
*
|
||||
* JUSTIFICATION_CENTER
|
||||
* The text lines of the paragraph are centered about a line going
|
||||
* down through their middle of the text lines.
|
||||
*
|
||||
* JUSTIFICATION_RIGHT
|
||||
* Each line, except possibly the first, is flush to the same right tab stop.
|
||||
*/
|
||||
enum ParagraphJustification {
|
||||
JUSTIFICATION_UNKNOWN,
|
||||
JUSTIFICATION_LEFT,
|
||||
JUSTIFICATION_CENTER,
|
||||
JUSTIFICATION_RIGHT,
|
||||
};
|
||||
|
||||
/**
|
||||
* When Tesseract/Cube is initialized we can choose to instantiate/load/run
|
||||
* only the Tesseract part, only the Cube part or both along with the combiner.
|
||||
* The preference of which engine to use is stored in tessedit_ocr_engine_mode.
|
||||
*
|
||||
* ATTENTION: When modifying this enum, please make sure to make the
|
||||
* appropriate changes to all the enums mirroring it (e.g. OCREngine in
|
||||
* cityblock/workflow/detection/detection_storage.proto). Such enums will
|
||||
* mention the connection to OcrEngineMode in the comments.
|
||||
*/
|
||||
enum OcrEngineMode {
|
||||
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
|
||||
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
|
||||
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
|
||||
// to Tesseract when things get difficult.
|
||||
// deprecated
|
||||
OEM_DEFAULT, // Specify this mode when calling init_*(),
|
||||
// to indicate that any of the above modes
|
||||
// should be automatically inferred from the
|
||||
// variables in the language-specific config,
|
||||
// command-line configs, or if not specified
|
||||
// in any of the above should be set to the
|
||||
// default OEM_TESSERACT_ONLY.
|
||||
OEM_COUNT // Number of OEMs
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
|
@ -1,311 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: renderer.h
|
||||
// Description: Rendering interface to inject into TessBaseAPI
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_RENDERER_H_
|
||||
#define TESSERACT_API_RENDERER_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
|
||||
// complexity of includes here. Use forward declarations wherever possible
|
||||
// and hide includes of complex types in baseapi.cpp.
|
||||
#include <cstdint>
|
||||
#include <string> // for std::string
|
||||
#include <vector> // for std::vector
|
||||
|
||||
struct Pix;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class TessBaseAPI;
|
||||
|
||||
/**
|
||||
* Interface for rendering tesseract results into a document, such as text,
|
||||
* HOCR or pdf. This class is abstract. Specific classes handle individual
|
||||
* formats. This interface is then used to inject the renderer class into
|
||||
* tesseract when processing images.
|
||||
*
|
||||
* For simplicity implementing this with tesseract version 3.01,
|
||||
* the renderer contains document state that is cleared from document
|
||||
* to document just as the TessBaseAPI is. This way the base API can just
|
||||
* delegate its rendering functionality to injected renderers, and the
|
||||
* renderers can manage the associated state needed for the specific formats
|
||||
* in addition to the heuristics for producing it.
|
||||
*/
|
||||
class TESS_API TessResultRenderer {
|
||||
public:
|
||||
virtual ~TessResultRenderer();
|
||||
|
||||
// Takes ownership of pointer so must be new'd instance.
|
||||
// Renderers aren't ordered, but appends the sequences of next parameter
|
||||
// and existing next(). The renderers should be unique across both lists.
|
||||
void insert(TessResultRenderer *next);
|
||||
|
||||
// Returns the next renderer or nullptr.
|
||||
TessResultRenderer *next() {
|
||||
return next_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts a new document with the given title.
|
||||
* This clears the contents of the output data.
|
||||
* Title should use UTF-8 encoding.
|
||||
*/
|
||||
bool BeginDocument(const char *title);
|
||||
|
||||
/**
|
||||
* Adds the recognized text from the source image to the current document.
|
||||
* Invalid if BeginDocument not yet called.
|
||||
*
|
||||
* Note that this API is a bit weird but is designed to fit into the
|
||||
* current TessBaseAPI implementation where the api has lots of state
|
||||
* information that we might want to add in.
|
||||
*/
|
||||
bool AddImage(TessBaseAPI *api);
|
||||
|
||||
/**
|
||||
* Finishes the document and finalizes the output data
|
||||
* Invalid if BeginDocument not yet called.
|
||||
*/
|
||||
bool EndDocument();
|
||||
|
||||
const char *file_extension() const {
|
||||
return file_extension_;
|
||||
}
|
||||
const char *title() const {
|
||||
return title_.c_str();
|
||||
}
|
||||
|
||||
// Is everything fine? Otherwise something went wrong.
|
||||
bool happy() const {
|
||||
return happy_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the last image given to AddImage
|
||||
* (i.e. images are incremented whether the image succeeded or not)
|
||||
*
|
||||
* This is always defined. It means either the number of the
|
||||
* current image, the last image ended, or in the completed document
|
||||
* depending on when in the document lifecycle you are looking at it.
|
||||
* Will return -1 if a document was never started.
|
||||
*/
|
||||
int imagenum() const {
|
||||
return imagenum_;
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Called by concrete classes.
|
||||
*
|
||||
* outputbase is the name of the output file excluding
|
||||
* extension. For example, "/path/to/chocolate-chip-cookie-recipe"
|
||||
*
|
||||
* extension indicates the file extension to be used for output
|
||||
* files. For example "pdf" will produce a .pdf file, and "hocr"
|
||||
* will produce .hocr files.
|
||||
*/
|
||||
TessResultRenderer(const char *outputbase, const char *extension);
|
||||
|
||||
// Hook for specialized handling in BeginDocument()
|
||||
virtual bool BeginDocumentHandler();
|
||||
|
||||
// This must be overridden to render the OCR'd results
|
||||
virtual bool AddImageHandler(TessBaseAPI *api) = 0;
|
||||
|
||||
// Hook for specialized handling in EndDocument()
|
||||
virtual bool EndDocumentHandler();
|
||||
|
||||
// Renderers can call this to append '\0' terminated strings into
|
||||
// the output string returned by GetOutput.
|
||||
// This method will grow the output buffer if needed.
|
||||
void AppendString(const char *s);
|
||||
|
||||
// Renderers can call this to append binary byte sequences into
|
||||
// the output string returned by GetOutput. Note that s is not necessarily
|
||||
// '\0' terminated (and can contain '\0' within it).
|
||||
// This method will grow the output buffer if needed.
|
||||
void AppendData(const char *s, int len);
|
||||
|
||||
private:
|
||||
TessResultRenderer *next_; // Can link multiple renderers together
|
||||
FILE *fout_; // output file pointer
|
||||
const char *file_extension_; // standard extension for generated output
|
||||
std::string title_; // title of document being rendered
|
||||
int imagenum_; // index of last image added
|
||||
bool happy_; // I get grumpy when the disk fills up, etc.
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessTextRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessTextRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an hocr text string
|
||||
*/
|
||||
class TESS_API TessHOcrRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessHOcrRenderer(const char *outputbase, bool font_info);
|
||||
explicit TessHOcrRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool font_info_; // whether to print font information
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an alto text string
|
||||
*/
|
||||
class TESS_API TessAltoRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessAltoRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool begin_document;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders Tesseract output into a TSV string
|
||||
*/
|
||||
class TESS_API TessTsvRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessTsvRenderer(const char *outputbase, bool font_info);
|
||||
explicit TessTsvRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool font_info_; // whether to print font information
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into searchable PDF
|
||||
*/
|
||||
class TESS_API TessPDFRenderer : public TessResultRenderer {
|
||||
public:
|
||||
// datadir is the location of the TESSDATA. We need it because
|
||||
// we load a custom PDF font from this location.
|
||||
TessPDFRenderer(const char *outputbase, const char *datadir,
|
||||
bool textonly = false);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
// We don't want to have every image in memory at once,
|
||||
// so we store some metadata as we go along producing
|
||||
// PDFs one page at a time. At the end, that metadata is
|
||||
// used to make everything that isn't easily handled in a
|
||||
// streaming fashion.
|
||||
long int obj_; // counter for PDF objects
|
||||
std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
|
||||
std::vector<long int> pages_; // object number for every /Page object
|
||||
std::string datadir_; // where to find the custom font
|
||||
bool textonly_; // skip images if set
|
||||
// Bookkeeping only. DIY = Do It Yourself.
|
||||
void AppendPDFObjectDIY(size_t objectsize);
|
||||
// Bookkeeping + emit data.
|
||||
void AppendPDFObject(const char *data);
|
||||
// Create the /Contents object for an entire page.
|
||||
char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
|
||||
// Turn an image into a PDF object. Only transcode if we have to.
|
||||
static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
|
||||
char **pdf_object, long int *pdf_object_size,
|
||||
int jpg_quality);
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessUnlvRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessUnlvRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string for LSTMBox
|
||||
*/
|
||||
class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessLSTMBoxRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessBoxTextRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string in WordStr format
|
||||
*/
|
||||
class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessWordStrBoxRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an osd text string
|
||||
*/
|
||||
class TESS_API TessOsdRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessOsdRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_API_RENDERER_H_
|
|
@ -1,250 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: resultiterator.h
|
||||
// Description: Iterator for tesseract results that is capable of
|
||||
// iterating in proper reading order over Bi Directional
|
||||
// (e.g. mixed Hebrew and English) text.
|
||||
// Author: David Eger
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
||||
|
||||
#include "export.h" // for TESS_API, TESS_LOCAL
|
||||
#include "ltrresultiterator.h" // for LTRResultIterator
|
||||
#include "publictypes.h" // for PageIteratorLevel
|
||||
#include "unichar.h" // for StrongScriptDirection
|
||||
|
||||
#include <set> // for std::pair
|
||||
#include <vector> // for std::vector
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class TESS_API ResultIterator : public LTRResultIterator {
|
||||
public:
|
||||
static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
|
||||
|
||||
/**
|
||||
* ResultIterator is copy constructible!
|
||||
* The default copy constructor works just fine for us.
|
||||
*/
|
||||
~ResultIterator() override = default;
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
/**
|
||||
* Moves the iterator to point to the start of the page to begin
|
||||
* an iteration.
|
||||
*/
|
||||
void Begin() override;
|
||||
|
||||
/**
|
||||
* Moves to the start of the next object at the given level in the
|
||||
* page hierarchy in the appropriate reading order and returns false if
|
||||
* the end of the page was reached.
|
||||
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
|
||||
* PageIteratorLevel level values will visit each non-text block once.
|
||||
* Think of non text blocks as containing a single para, with a single line,
|
||||
* with a single imaginary word.
|
||||
* Calls to Next with different levels may be freely intermixed.
|
||||
* This function iterates words in right-to-left scripts correctly, if
|
||||
* the appropriate language has been loaded into Tesseract.
|
||||
*/
|
||||
bool Next(PageIteratorLevel level) override;
|
||||
|
||||
/**
|
||||
* IsAtBeginningOf() returns whether we're at the logical beginning of the
|
||||
* given level. (as opposed to ResultIterator's left-to-right top-to-bottom
|
||||
* order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
|
||||
* For a full description, see pageiterator.h
|
||||
*/
|
||||
bool IsAtBeginningOf(PageIteratorLevel level) const override;
|
||||
|
||||
/**
|
||||
* Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
|
||||
* For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
|
||||
* point at the last word in a paragraph. See PageIterator for full comment.
|
||||
*/
|
||||
bool IsAtFinalElement(PageIteratorLevel level,
|
||||
PageIteratorLevel element) const override;
|
||||
|
||||
// ============= Functions that refer to words only ============.
|
||||
// Returns the number of blanks before the current word.
|
||||
int BlanksBeforeWord() const;
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
/**
|
||||
* Returns the null terminated UTF-8 encoded text string for the current
|
||||
* object at the given level. Use delete [] to free after use.
|
||||
*/
|
||||
virtual char *GetUTF8Text(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns the LSTM choices for every LSTM timestep for the current word.
|
||||
*/
|
||||
virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
|
||||
*GetRawLSTMTimesteps() const;
|
||||
virtual std::vector<std::vector<std::pair<const char *, float>>>
|
||||
*GetBestLSTMSymbolChoices() const;
|
||||
|
||||
/**
|
||||
* Return whether the current paragraph's dominant reading direction
|
||||
* is left-to-right (as opposed to right-to-left).
|
||||
*/
|
||||
bool ParagraphIsLtr() const;
|
||||
|
||||
// ============= Exposed only for testing =============.
|
||||
|
||||
/**
|
||||
* Yields the reading order as a sequence of indices and (optional)
|
||||
* meta-marks for a set of words (given left-to-right).
|
||||
* The meta marks are passed as negative values:
|
||||
* kMinorRunStart Start of minor direction text.
|
||||
* kMinorRunEnd End of minor direction text.
|
||||
* kComplexWord The next indexed word contains both left-to-right and
|
||||
* right-to-left characters and was treated as neutral.
|
||||
*
|
||||
* For example, suppose we have five words in a text line,
|
||||
* indexed [0,1,2,3,4] from the leftmost side of the text line.
|
||||
* The following are all believable reading_orders:
|
||||
*
|
||||
* Left-to-Right (in ltr paragraph):
|
||||
* { 0, 1, 2, 3, 4 }
|
||||
* Left-to-Right (in rtl paragraph):
|
||||
* { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
|
||||
* Right-to-Left (in rtl paragraph):
|
||||
* { 4, 3, 2, 1, 0 }
|
||||
* Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
|
||||
* { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
|
||||
*/
|
||||
static void CalculateTextlineOrder(
|
||||
bool paragraph_is_ltr,
|
||||
const std::vector<StrongScriptDirection> &word_dirs,
|
||||
std::vector<int> *reading_order);
|
||||
|
||||
static const int kMinorRunStart;
|
||||
static const int kMinorRunEnd;
|
||||
static const int kComplexWord;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* We presume the data associated with the given iterator will outlive us.
|
||||
* NB: This is private because it does something that is non-obvious:
|
||||
* it resets to the beginning of the paragraph instead of staying wherever
|
||||
* resit might have pointed.
|
||||
*/
|
||||
explicit ResultIterator(const LTRResultIterator &resit);
|
||||
|
||||
private:
|
||||
/**
|
||||
* Calculates the current paragraph's dominant writing direction.
|
||||
* Typically, members should use current_paragraph_ltr_ instead.
|
||||
*/
|
||||
bool CurrentParagraphIsLtr() const;
|
||||
|
||||
/**
|
||||
* Returns word indices as measured from resit->RestartRow() = index 0
|
||||
* for the reading order of words within a textline given an iterator
|
||||
* into the middle of the text line.
|
||||
* In addition to non-negative word indices, the following negative values
|
||||
* may be inserted:
|
||||
* kMinorRunStart Start of minor direction text.
|
||||
* kMinorRunEnd End of minor direction text.
|
||||
* kComplexWord The previous word contains both left-to-right and
|
||||
* right-to-left characters and was treated as neutral.
|
||||
*/
|
||||
void CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
const LTRResultIterator &resit,
|
||||
std::vector<int> *indices) const;
|
||||
/** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
|
||||
void CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
const LTRResultIterator &resit,
|
||||
std::vector<StrongScriptDirection> *ssd,
|
||||
std::vector<int> *indices) const;
|
||||
|
||||
/**
|
||||
* What is the index of the current word in a strict left-to-right reading
|
||||
* of the row?
|
||||
*/
|
||||
int LTRWordIndex() const;
|
||||
|
||||
/**
|
||||
* Given an iterator pointing at a word, returns the logical reading order
|
||||
* of blob indices for the word.
|
||||
*/
|
||||
void CalculateBlobOrder(std::vector<int> *blob_indices) const;
|
||||
|
||||
/** Precondition: current_paragraph_is_ltr_ is set. */
|
||||
void MoveToLogicalStartOfTextline();
|
||||
|
||||
/**
|
||||
* Precondition: current_paragraph_is_ltr_ and in_minor_direction_
|
||||
* are set.
|
||||
*/
|
||||
void MoveToLogicalStartOfWord();
|
||||
|
||||
/** Are we pointing at the final (reading order) symbol of the word? */
|
||||
bool IsAtFinalSymbolOfWord() const;
|
||||
|
||||
/** Are we pointing at the first (reading order) symbol of the word? */
|
||||
bool IsAtFirstSymbolOfWord() const;
|
||||
|
||||
/**
|
||||
* Append any extra marks that should be appended to this word when printed.
|
||||
* Mostly, these are Unicode BiDi control characters.
|
||||
*/
|
||||
void AppendSuffixMarks(std::string *text) const;
|
||||
|
||||
/** Appends the current word in reading order to the given buffer.*/
|
||||
void AppendUTF8WordText(std::string *text) const;
|
||||
|
||||
/**
|
||||
* Appends the text of the current text line, *assuming this iterator is
|
||||
* positioned at the beginning of the text line* This function
|
||||
* updates the iterator to point to the first position past the text line.
|
||||
* Each textline is terminated in a single newline character.
|
||||
* If the textline ends a paragraph, it gets a second terminal newline.
|
||||
*/
|
||||
void IterateAndAppendUTF8TextlineText(std::string *text);
|
||||
|
||||
/**
|
||||
* Appends the text of the current paragraph in reading order
|
||||
* to the given buffer.
|
||||
* Each textline is terminated in a single newline character, and the
|
||||
* paragraph gets an extra newline at the end.
|
||||
*/
|
||||
void AppendUTF8ParagraphText(std::string *text) const;
|
||||
|
||||
/** Returns whether the bidi_debug flag is set to at least min_level. */
|
||||
bool BidiDebug(int min_level) const;
|
||||
|
||||
bool current_paragraph_is_ltr_;
|
||||
|
||||
/**
|
||||
* Is the currently pointed-at character at the beginning of
|
||||
* a minor-direction run?
|
||||
*/
|
||||
bool at_beginning_of_minor_run_;
|
||||
|
||||
/** Is the currently pointed-at character in a minor-direction sequence? */
|
||||
bool in_minor_direction_;
|
||||
|
||||
/**
|
||||
* Should detected inter-word spaces be preserved, or "compressed" to a single
|
||||
* space character (default behavior).
|
||||
*/
|
||||
bool preserve_interword_spaces_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
|
@ -1,174 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: unichar.h
|
||||
// Description: Unicode character/ligature class.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCUTIL_UNICHAR_H_
|
||||
#define TESSERACT_CCUTIL_UNICHAR_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
#include <memory.h>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Maximum number of characters that can be stored in a UNICHAR. Must be
|
||||
// at least 4. Must not exceed 31 without changing the coding of length.
|
||||
#define UNICHAR_LEN 30
|
||||
|
||||
// A UNICHAR_ID is the unique id of a unichar.
|
||||
using UNICHAR_ID = int;
|
||||
|
||||
// A variable to indicate an invalid or uninitialized unichar id.
|
||||
static const int INVALID_UNICHAR_ID = -1;
|
||||
// A special unichar that corresponds to INVALID_UNICHAR_ID.
|
||||
static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
|
||||
|
||||
enum StrongScriptDirection {
|
||||
DIR_NEUTRAL = 0, // Text contains only neutral characters.
|
||||
DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
|
||||
DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
|
||||
DIR_MIX = 3, // Text contains a mixture of left-to-right
|
||||
// and right-to-left characters.
|
||||
};
|
||||
|
||||
using char32 = signed int;
|
||||
|
||||
// The UNICHAR class holds a single classification result. This may be
|
||||
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
|
||||
// multiple Unicode characters representing the NFKC expansion of a ligature
|
||||
// such as fi, ffl etc. These are also stored as utf8.
|
||||
class TESS_API UNICHAR {
|
||||
public:
|
||||
UNICHAR() {
|
||||
memset(chars, 0, UNICHAR_LEN);
|
||||
}
|
||||
|
||||
// Construct from a utf8 string. If len<0 then the string is null terminated.
|
||||
// If the string is too long to fit in the UNICHAR then it takes only what
|
||||
// will fit.
|
||||
UNICHAR(const char *utf8_str, int len);
|
||||
|
||||
// Construct from a single UCS4 character.
|
||||
explicit UNICHAR(int unicode);
|
||||
|
||||
// Default copy constructor and operator= are OK.
|
||||
|
||||
// Get the first character as UCS-4.
|
||||
int first_uni() const;
|
||||
|
||||
// Get the length of the UTF8 string.
|
||||
int utf8_len() const {
|
||||
int len = chars[UNICHAR_LEN - 1];
|
||||
return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
|
||||
}
|
||||
|
||||
// Get a UTF8 string, but NOT nullptr terminated.
|
||||
const char *utf8() const {
|
||||
return chars;
|
||||
}
|
||||
|
||||
// Get a terminated UTF8 string: Must delete[] it after use.
|
||||
char *utf8_str() const;
|
||||
|
||||
// Get the number of bytes in the first character of the given utf8 string.
|
||||
static int utf8_step(const char *utf8_str);
|
||||
|
||||
// A class to simplify iterating over and accessing elements of a UTF8
|
||||
// string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
|
||||
// take ownership of the underlying byte array. It also does not permit
|
||||
// modification of the array (as the name suggests).
|
||||
//
|
||||
// Example:
|
||||
// for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
|
||||
// it != UNICHAR::end(str, len);
|
||||
// ++it) {
|
||||
// printf("UCS-4 symbol code = %d\n", *it);
|
||||
// char buf[5];
|
||||
// int char_len = it.get_utf8(buf); buf[char_len] = '\0';
|
||||
// printf("Char = %s\n", buf);
|
||||
// }
|
||||
class TESS_API const_iterator {
|
||||
using CI = const_iterator;
|
||||
|
||||
public:
|
||||
// Step to the next UTF8 character.
|
||||
// If the current position is at an illegal UTF8 character, then print an
|
||||
// error message and step by one byte. If the current position is at a
|
||||
// nullptr value, don't step past it.
|
||||
const_iterator &operator++();
|
||||
|
||||
// Return the UCS-4 value at the current position.
|
||||
// If the current position is at an illegal UTF8 value, return a single
|
||||
// space character.
|
||||
int operator*() const;
|
||||
|
||||
// Store the UTF-8 encoding of the current codepoint into buf, which must be
|
||||
// at least 4 bytes long. Return the number of bytes written.
|
||||
// If the current position is at an illegal UTF8 value, writes a single
|
||||
// space character and returns 1.
|
||||
// Note that this method does not null-terminate the buffer.
|
||||
int get_utf8(char *buf) const;
|
||||
// Returns the number of bytes of the current codepoint. Returns 1 if the
|
||||
// current position is at an illegal UTF8 value.
|
||||
int utf8_len() const;
|
||||
// Returns true if the UTF-8 encoding at the current position is legal.
|
||||
bool is_legal() const;
|
||||
|
||||
// Return the pointer into the string at the current position.
|
||||
const char *utf8_data() const {
|
||||
return it_;
|
||||
}
|
||||
|
||||
// Iterator equality operators.
|
||||
friend bool operator==(const CI &lhs, const CI &rhs) {
|
||||
return lhs.it_ == rhs.it_;
|
||||
}
|
||||
friend bool operator!=(const CI &lhs, const CI &rhs) {
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
private:
|
||||
friend class UNICHAR;
|
||||
explicit const_iterator(const char *it) : it_(it) {}
|
||||
|
||||
const char *it_; // Pointer into the string.
|
||||
};
|
||||
|
||||
// Create a start/end iterator pointing to a string. Note that these methods
|
||||
// are static and do NOT create a copy or take ownership of the underlying
|
||||
// array.
|
||||
static const_iterator begin(const char *utf8_str, int byte_length);
|
||||
static const_iterator end(const char *utf8_str, int byte_length);
|
||||
|
||||
// Converts a utf-8 string to a vector of unicodes.
|
||||
// Returns an empty vector if the input contains invalid UTF-8.
|
||||
static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
|
||||
// Converts a vector of unicodes to a utf8 string.
|
||||
// Returns an empty string if the input contains an invalid unicode.
|
||||
static std::string UTF32ToUTF8(const std::vector<char32> &str32);
|
||||
|
||||
private:
|
||||
// A UTF-8 representation of 1 or more Unicode characters.
|
||||
// The last element (chars[UNICHAR_LEN - 1]) is a length if
|
||||
// its value < UNICHAR_LEN, otherwise it is a genuine character.
|
||||
char chars[UNICHAR_LEN]{};
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCUTIL_UNICHAR_H_
|
|
@ -1,34 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: version.h
|
||||
// Description: Version information
|
||||
//
|
||||
// (C) Copyright 2018, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_VERSION_H_
|
||||
#define TESSERACT_API_VERSION_H_
|
||||
|
||||
// clang-format off
|
||||
|
||||
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
|
||||
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
|
||||
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
|
||||
|
||||
#define TESSERACT_VERSION \
|
||||
(TESSERACT_MAJOR_VERSION << 16 | \
|
||||
TESSERACT_MINOR_VERSION << 8 | \
|
||||
TESSERACT_MICRO_VERSION)
|
||||
|
||||
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
|
||||
|
||||
// clang-format on
|
||||
|
||||
#endif // TESSERACT_API_VERSION_H_
|
|
@ -1,812 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: baseapi.h
|
||||
// Description: Simple API for calling tesseract.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_BASEAPI_H_
|
||||
#define TESSERACT_API_BASEAPI_H_
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config_auto.h" // DISABLED_LEGACY_ENGINE
|
||||
#endif
|
||||
|
||||
#include "export.h"
|
||||
#include "pageiterator.h"
|
||||
#include "publictypes.h"
|
||||
#include "resultiterator.h"
|
||||
#include "unichar.h"
|
||||
|
||||
#include "version.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <vector> // for std::vector
|
||||
|
||||
struct Pix;
|
||||
struct Pixa;
|
||||
struct Boxa;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class PAGE_RES;
|
||||
class ParagraphModel;
|
||||
class BLOCK_LIST;
|
||||
class ETEXT_DESC;
|
||||
struct OSResults;
|
||||
class UNICHARSET;
|
||||
|
||||
class Dawg;
|
||||
class Dict;
|
||||
class EquationDetect;
|
||||
class PageIterator;
|
||||
class ImageThresholder;
|
||||
class LTRResultIterator;
|
||||
class ResultIterator;
|
||||
class MutableIterator;
|
||||
class TessResultRenderer;
|
||||
class Tesseract;
|
||||
|
||||
// Function to read a std::vector<char> from a whole file.
|
||||
// Returns false on failure.
|
||||
using FileReader = bool (*)(const char *filename, std::vector<char> *data);
|
||||
|
||||
using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
|
||||
bool) const;
|
||||
using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
|
||||
int, const char *, int);
|
||||
|
||||
/**
|
||||
* Base class for all tesseract APIs.
|
||||
* Specific classes can add ability to work on different inputs or produce
|
||||
* different outputs.
|
||||
* This class is mostly an interface layer on top of the Tesseract instance
|
||||
* class to hide the data types so that users of this class don't have to
|
||||
* include any other Tesseract headers.
|
||||
*/
|
||||
class TESS_API TessBaseAPI {
|
||||
public:
|
||||
TessBaseAPI();
|
||||
virtual ~TessBaseAPI();
|
||||
// Copy constructor and assignment operator are currently unsupported.
|
||||
TessBaseAPI(TessBaseAPI const &) = delete;
|
||||
TessBaseAPI &operator=(TessBaseAPI const &) = delete;
|
||||
|
||||
/**
|
||||
* Returns the version identifier as a static string. Do not delete.
|
||||
*/
|
||||
static const char *Version();
|
||||
|
||||
/**
|
||||
* If compiled with OpenCL AND an available OpenCL
|
||||
* device is deemed faster than serial code, then
|
||||
* "device" is populated with the cl_device_id
|
||||
* and returns sizeof(cl_device_id)
|
||||
* otherwise *device=nullptr and returns 0.
|
||||
*/
|
||||
static size_t getOpenCLDevice(void **device);
|
||||
|
||||
/**
|
||||
* Set the name of the input file. Needed for training and
|
||||
* reading a UNLV zone file, and for searchable PDF output.
|
||||
*/
|
||||
void SetInputName(const char *name);
|
||||
/**
|
||||
* These functions are required for searchable PDF output.
|
||||
* We need our hands on the input file so that we can include
|
||||
* it in the PDF without transcoding. If that is not possible,
|
||||
* we need the original image. Finally, resolution metadata
|
||||
* is stored in the PDF so we need that as well.
|
||||
*/
|
||||
const char *GetInputName();
|
||||
// Takes ownership of the input pix.
|
||||
void SetInputImage(Pix *pix);
|
||||
Pix *GetInputImage();
|
||||
int GetSourceYResolution();
|
||||
const char *GetDatapath();
|
||||
|
||||
/** Set the name of the bonus output files. Needed only for debugging. */
|
||||
void SetOutputName(const char *name);
|
||||
|
||||
/**
|
||||
* Set the value of an internal "parameter."
|
||||
* Supply the name of the parameter and the value as a string, just as
|
||||
* you would in a config file.
|
||||
* Returns false if the name lookup failed.
|
||||
* Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
|
||||
* Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
|
||||
* SetVariable may be used before Init, but settings will revert to
|
||||
* defaults on End().
|
||||
*
|
||||
* Note: Must be called after Init(). Only works for non-init variables
|
||||
* (init variables should be passed to Init()).
|
||||
*/
|
||||
bool SetVariable(const char *name, const char *value);
|
||||
bool SetDebugVariable(const char *name, const char *value);
|
||||
|
||||
/**
|
||||
* Returns true if the parameter was found among Tesseract parameters.
|
||||
* Fills in value with the value of the parameter.
|
||||
*/
|
||||
bool GetIntVariable(const char *name, int *value) const;
|
||||
bool GetBoolVariable(const char *name, bool *value) const;
|
||||
bool GetDoubleVariable(const char *name, double *value) const;
|
||||
|
||||
/**
|
||||
* Returns the pointer to the string that represents the value of the
|
||||
* parameter if it was found among Tesseract parameters.
|
||||
*/
|
||||
const char *GetStringVariable(const char *name) const;
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Print Tesseract fonts table to the given file.
|
||||
*/
|
||||
void PrintFontsTable(FILE *fp) const;
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Print Tesseract parameters to the given file.
|
||||
*/
|
||||
void PrintVariables(FILE *fp) const;
|
||||
|
||||
/**
|
||||
* Get value of named variable as a string, if it exists.
|
||||
*/
|
||||
bool GetVariableAsString(const char *name, std::string *val) const;
|
||||
|
||||
/**
|
||||
* Instances are now mostly thread-safe and totally independent,
|
||||
* but some global parameters remain. Basically it is safe to use multiple
|
||||
* TessBaseAPIs in different threads in parallel, UNLESS:
|
||||
* you use SetVariable on some of the Params in classify and textord.
|
||||
* If you do, then the effect will be to change it for all your instances.
|
||||
*
|
||||
* Start tesseract. Returns zero on success and -1 on failure.
|
||||
* NOTE that the only members that may be called before Init are those
|
||||
* listed above here in the class definition.
|
||||
*
|
||||
* The datapath must be the name of the tessdata directory.
|
||||
* The language is (usually) an ISO 639-3 string or nullptr will default to
|
||||
* eng. It is entirely safe (and eventually will be efficient too) to call
|
||||
* Init multiple times on the same instance to change language, or just
|
||||
* to reset the classifier.
|
||||
* The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
|
||||
* that multiple languages are to be loaded. Eg hin+eng will load Hindi and
|
||||
* English. Languages may specify internally that they want to be loaded
|
||||
* with one or more other languages, so the ~ sign is available to override
|
||||
* that. Eg if hin were set to load eng by default, then hin+~eng would force
|
||||
* loading only hin. The number of loaded languages is limited only by
|
||||
* memory, with the caveat that loading additional languages will impact
|
||||
* both speed and accuracy, as there is more work to do to decide on the
|
||||
* applicable language, and there is more chance of hallucinating incorrect
|
||||
* words.
|
||||
* WARNING: On changing languages, all Tesseract parameters are reset
|
||||
* back to their default values. (Which may vary between languages.)
|
||||
* If you have a rare need to set a Variable that controls
|
||||
* initialization for a second call to Init you should explicitly
|
||||
* call End() and then use SetVariable before Init. This is only a very
|
||||
* rare use case, since there are very few uses that require any parameters
|
||||
* to be set before Init.
|
||||
*
|
||||
* If set_only_non_debug_params is true, only params that do not contain
|
||||
* "debug" in the name will be set.
|
||||
*/
|
||||
int Init(const char *datapath, const char *language, OcrEngineMode mode,
|
||||
char **configs, int configs_size,
|
||||
const std::vector<std::string> *vars_vec,
|
||||
const std::vector<std::string> *vars_values,
|
||||
bool set_only_non_debug_params);
|
||||
int Init(const char *datapath, const char *language, OcrEngineMode oem) {
|
||||
return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
|
||||
}
|
||||
int Init(const char *datapath, const char *language) {
|
||||
return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
|
||||
false);
|
||||
}
|
||||
// In-memory version reads the traineddata file directly from the given
|
||||
// data[data_size] array, and/or reads data via a FileReader.
|
||||
int Init(const char *data, int data_size, const char *language,
|
||||
OcrEngineMode mode, char **configs, int configs_size,
|
||||
const std::vector<std::string> *vars_vec,
|
||||
const std::vector<std::string> *vars_values,
|
||||
bool set_only_non_debug_params, FileReader reader);
|
||||
|
||||
/**
|
||||
* Returns the languages string used in the last valid initialization.
|
||||
* If the last initialization specified "deu+hin" then that will be
|
||||
* returned. If hin loaded eng automatically as well, then that will
|
||||
* not be included in this list. To find the languages actually
|
||||
* loaded use GetLoadedLanguagesAsVector.
|
||||
* The returned string should NOT be deleted.
|
||||
*/
|
||||
const char *GetInitLanguagesAsString() const;
|
||||
|
||||
/**
|
||||
* Returns the loaded languages in the vector of std::string.
|
||||
* Includes all languages loaded by the last Init, including those loaded
|
||||
* as dependencies of other loaded languages.
|
||||
*/
|
||||
void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
|
||||
|
||||
/**
|
||||
* Returns the available languages in the sorted vector of std::string.
|
||||
*/
|
||||
void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
|
||||
|
||||
/**
|
||||
* Init only for page layout analysis. Use only for calls to SetImage and
|
||||
* AnalysePage. Calls that attempt recognition will generate an error.
|
||||
*/
|
||||
void InitForAnalysePage();
|
||||
|
||||
/**
|
||||
* Read a "config" file containing a set of param, value pairs.
|
||||
* Searches the standard places: tessdata/configs, tessdata/tessconfigs
|
||||
* and also accepts a relative or absolute path name.
|
||||
* Note: only non-init params will be set (init params are set by Init()).
|
||||
*/
|
||||
void ReadConfigFile(const char *filename);
|
||||
/** Same as above, but only set debug params from the given config file. */
|
||||
void ReadDebugConfigFile(const char *filename);
|
||||
|
||||
/**
|
||||
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
|
||||
* The mode is stored as an IntParam so it can also be modified by
|
||||
* ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
|
||||
*/
|
||||
void SetPageSegMode(PageSegMode mode);
|
||||
|
||||
/** Return the current page segmentation mode. */
|
||||
PageSegMode GetPageSegMode() const;
|
||||
|
||||
/**
|
||||
* Recognize a rectangle from an image and return the result as a string.
|
||||
* May be called many times for a single Init.
|
||||
* Currently has no error checking.
|
||||
* Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
|
||||
* Palette color images will not work properly and must be converted to
|
||||
* 24 bit.
|
||||
* Binary images of 1 bit per pixel may also be given but they must be
|
||||
* byte packed with the MSB of the first byte being the first pixel, and a
|
||||
* 1 represents WHITE. For binary images set bytes_per_pixel=0.
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
*
|
||||
* Note that TesseractRect is the simplified convenience interface.
|
||||
* For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
|
||||
* and one or more of the Get*Text functions below.
|
||||
*/
|
||||
char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
|
||||
int bytes_per_line, int left, int top, int width,
|
||||
int height);
|
||||
|
||||
/**
|
||||
* Call between pages or documents etc to free up memory and forget
|
||||
* adaptive data.
|
||||
*/
|
||||
void ClearAdaptiveClassifier();
|
||||
|
||||
/**
|
||||
* @defgroup AdvancedAPI Advanced API
|
||||
* The following methods break TesseractRect into pieces, so you can
|
||||
* get hold of the thresholded image, get the text in different formats,
|
||||
* get bounding boxes, confidences etc.
|
||||
*/
|
||||
/* @{ */
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. Format is as
|
||||
* TesseractRect above. Copies the image buffer and converts to Pix.
|
||||
* SetImage clears all recognition results, and sets the rectangle to the
|
||||
* full image, so it may be followed immediately by a GetUTF8Text, and it
|
||||
* will automatically perform recognition.
|
||||
*/
|
||||
void SetImage(const unsigned char *imagedata, int width, int height,
|
||||
int bytes_per_pixel, int bytes_per_line);
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. As with SetImage above,
|
||||
* Tesseract takes its own copy of the image, so it need not persist until
|
||||
* after Recognize.
|
||||
* Pix vs raw, which to use?
|
||||
* Use Pix where possible. Tesseract uses Pix as its internal representation
|
||||
* and it is therefore more efficient to provide a Pix directly.
|
||||
*/
|
||||
void SetImage(Pix *pix);
|
||||
|
||||
/**
|
||||
* Set the resolution of the source image in pixels per inch so font size
|
||||
* information can be calculated in results. Call this after SetImage().
|
||||
*/
|
||||
void SetSourceResolution(int ppi);
|
||||
|
||||
/**
|
||||
* Restrict recognition to a sub-rectangle of the image. Call after SetImage.
|
||||
* Each SetRectangle clears the recogntion results so multiple rectangles
|
||||
* can be recognized with the same image.
|
||||
*/
|
||||
void SetRectangle(int left, int top, int width, int height);
|
||||
|
||||
/**
|
||||
* Get a copy of the internal thresholded image from Tesseract.
|
||||
* Caller takes ownership of the Pix and must pixDestroy it.
|
||||
* May be called any time after SetImage, or after TesseractRect.
|
||||
*/
|
||||
Pix *GetThresholdedImage();
|
||||
|
||||
/**
|
||||
* Get the result of page layout analysis as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
*/
|
||||
Boxa *GetRegions(Pixa **pixa);
|
||||
|
||||
/**
|
||||
* Get the textlines as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* If raw_image is true, then extract from the original image instead of the
|
||||
* thresholded image and pad by raw_padding pixels.
|
||||
* If blockids is not nullptr, the block-id of each line is also returned as
|
||||
* an array of one element per line. delete [] after use. If paraids is not
|
||||
* nullptr, the paragraph-id of each line within its block is also returned as
|
||||
* an array of one element per line. delete [] after use.
|
||||
*/
|
||||
Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
/*
|
||||
Helper method to extract from the thresholded image. (most common usage)
|
||||
*/
|
||||
Boxa *GetTextlines(Pixa **pixa, int **blockids) {
|
||||
return GetTextlines(false, 0, pixa, blockids, nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
|
||||
* pair, in reading order. Enables downstream handling of non-rectangular
|
||||
* regions.
|
||||
* Can be called before or after Recognize.
|
||||
* If blockids is not nullptr, the block-id of each line is also returned as
|
||||
* an array of one element per line. delete [] after use.
|
||||
*/
|
||||
Boxa *GetStrips(Pixa **pixa, int **blockids);
|
||||
|
||||
/**
|
||||
* Get the words as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
*/
|
||||
Boxa *GetWords(Pixa **pixa);
|
||||
|
||||
/**
|
||||
* Gets the individual connected (text) components (created
|
||||
* after pages segmentation step, but before recognition)
|
||||
* as a leptonica-style Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* Note: the caller is responsible for calling boxaDestroy()
|
||||
* on the returned Boxa array and pixaDestroy() on cc array.
|
||||
*/
|
||||
Boxa *GetConnectedComponents(Pixa **cc);
|
||||
|
||||
/**
|
||||
* Get the given level kind of components (block, textline, word etc.) as a
|
||||
* leptonica-style Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* If blockids is not nullptr, the block-id of each component is also returned
|
||||
* as an array of one element per component. delete [] after use.
|
||||
* If blockids is not nullptr, the paragraph-id of each component with its
|
||||
* block is also returned as an array of one element per component. delete []
|
||||
* after use. If raw_image is true, then portions of the original image are
|
||||
* extracted instead of the thresholded image and padded with raw_padding. If
|
||||
* text_only is true, then only text components are returned.
|
||||
*/
|
||||
Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
|
||||
bool raw_image, int raw_padding, Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
// Helper function to get binary images with no padding (most common usage).
|
||||
Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
|
||||
Pixa **pixa, int **blockids) {
|
||||
return GetComponentImages(level, text_only, false, 0, pixa, blockids,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the scale factor of the thresholded image that would be returned by
|
||||
* GetThresholdedImage() and the various GetX() methods that call
|
||||
* GetComponentImages().
|
||||
* Returns 0 if no thresholder has been set.
|
||||
*/
|
||||
int GetThresholdedImageScaleFactor() const;
|
||||
|
||||
/**
|
||||
* Runs page layout analysis in the mode set by SetPageSegMode.
|
||||
* May optionally be called prior to Recognize to get access to just
|
||||
* the page layout results. Returns an iterator to the results.
|
||||
* If merge_similar_words is true, words are combined where suitable for use
|
||||
* with a line recognizer. Use if you want to use AnalyseLayout to find the
|
||||
* textlines, and then want to process textline fragments with an external
|
||||
* line recognizer.
|
||||
* Returns nullptr on error or an empty page.
|
||||
* The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
PageIterator *AnalyseLayout();
|
||||
PageIterator *AnalyseLayout(bool merge_similar_words);
|
||||
|
||||
/**
|
||||
* Recognize the image from SetAndThresholdImage, generating Tesseract
|
||||
* internal structures. Returns 0 on success.
|
||||
* Optional. The Get*Text functions below will call Recognize if needed.
|
||||
* After Recognize, the output is kept internally until the next SetImage.
|
||||
*/
|
||||
int Recognize(ETEXT_DESC *monitor);
|
||||
|
||||
/**
|
||||
* Methods to retrieve information after SetAndThresholdImage(),
|
||||
* Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
|
||||
*/
|
||||
|
||||
/**
|
||||
* Turns images into symbolic text.
|
||||
*
|
||||
* filename can point to a single image, a multi-page TIFF,
|
||||
* or a plain text list of image filenames.
|
||||
*
|
||||
* retry_config is useful for debugging. If not nullptr, you can fall
|
||||
* back to an alternate configuration if a page fails for some
|
||||
* reason.
|
||||
*
|
||||
* timeout_millisec terminates processing if any single page
|
||||
* takes too long. Set to 0 for unlimited time.
|
||||
*
|
||||
* renderer is responible for creating the output. For example,
|
||||
* use the TessTextRenderer if you want plaintext output, or
|
||||
* the TessPDFRender to produce searchable PDF.
|
||||
*
|
||||
* If tessedit_page_number is non-negative, will only process that
|
||||
* single page. Works for multi-page tiff file, or filelist.
|
||||
*
|
||||
* Returns true if successful, false on error.
|
||||
*/
|
||||
bool ProcessPages(const char *filename, const char *retry_config,
|
||||
int timeout_millisec, TessResultRenderer *renderer);
|
||||
// Does the real work of ProcessPages.
|
||||
bool ProcessPagesInternal(const char *filename, const char *retry_config,
|
||||
int timeout_millisec, TessResultRenderer *renderer);
|
||||
|
||||
/**
|
||||
* Turn a single image into symbolic text.
|
||||
*
|
||||
* The pix is the image processed. filename and page_index are
|
||||
* metadata used by side-effect processes, such as reading a box
|
||||
* file or formatting as hOCR.
|
||||
*
|
||||
* See ProcessPages for descriptions of other parameters.
|
||||
*/
|
||||
bool ProcessPage(Pix *pix, int page_index, const char *filename,
|
||||
const char *retry_config, int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
|
||||
/**
|
||||
* Get a reading-order iterator to the results of LayoutAnalysis and/or
|
||||
* Recognize. The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
ResultIterator *GetIterator();
|
||||
|
||||
/**
|
||||
* Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
|
||||
* The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
MutableIterator *GetMutableIterator();
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetUTF8Text();
|
||||
|
||||
/**
|
||||
* Make a HTML-formatted string with hOCR markup from the internal
|
||||
* data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* monitor can be used to
|
||||
* cancel the recognition
|
||||
* receive progress callbacks
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
|
||||
|
||||
/**
|
||||
* Make a HTML-formatted string with hOCR markup from the internal
|
||||
* data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetHOCRText(int page_number);
|
||||
|
||||
/**
|
||||
* Make an XML-formatted string with Alto markup from the internal
|
||||
* data structures.
|
||||
*/
|
||||
char *GetAltoText(ETEXT_DESC *monitor, int page_number);
|
||||
|
||||
/**
|
||||
* Make an XML-formatted string with Alto markup from the internal
|
||||
* data structures.
|
||||
*/
|
||||
char *GetAltoText(int page_number);
|
||||
|
||||
/**
|
||||
* Make a TSV-formatted string from the internal data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetTSVText(int page_number);
|
||||
|
||||
/**
|
||||
* Make a box file for LSTM training from the internal data structures.
|
||||
* Constructs coordinates in the original image - not just the rectangle.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetLSTMBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded in the same
|
||||
* format as a box file used in training.
|
||||
* Constructs coordinates in the original image - not just the rectangle.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded in the same
|
||||
* format as a WordStr box file used in training.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetWordStrBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UNLV format Latin-1 with specific reject and suspect codes.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetUNLVText();
|
||||
|
||||
/**
|
||||
* Detect the orientation of the input image and apparent script (alphabet).
|
||||
* orient_deg is the detected clockwise rotation of the input image in degrees
|
||||
* (0, 90, 180, 270)
|
||||
* orient_conf is the confidence (15.0 is reasonably confident)
|
||||
* script_name is an ASCII string, the name of the script, e.g. "Latin"
|
||||
* script_conf is confidence level in the script
|
||||
* Returns true on success and writes values to each parameter as an output
|
||||
*/
|
||||
bool DetectOrientationScript(int *orient_deg, float *orient_conf,
|
||||
const char **script_name, float *script_conf);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
* page_number is a 0-based page index that will appear in the osd file.
|
||||
*/
|
||||
char *GetOsdText(int page_number);
|
||||
|
||||
/** Returns the (average) confidence value between 0 and 100. */
|
||||
int MeanTextConf();
|
||||
/**
|
||||
* Returns all word confidences (between 0 and 100) in an array, terminated
|
||||
* by -1. The calling function must delete [] after use.
|
||||
* The number of confidences should correspond to the number of space-
|
||||
* delimited words in GetUTF8Text.
|
||||
*/
|
||||
int *AllWordConfidences();
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
/**
|
||||
* Applies the given word to the adaptive classifier if possible.
|
||||
* The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
|
||||
* tell the boundaries of the graphemes.
|
||||
* Assumes that SetImage/SetRectangle have been used to set the image
|
||||
* to the given word. The mode arg should be PSM_SINGLE_WORD or
|
||||
* PSM_CIRCLE_WORD, as that will be used to control layout analysis.
|
||||
* The currently set PageSegMode is preserved.
|
||||
* Returns false if adaption was not possible for some reason.
|
||||
*/
|
||||
bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Free up recognition results and any stored image data, without actually
|
||||
* freeing any recognition data that would be time-consuming to reload.
|
||||
* Afterwards, you must call SetImage or TesseractRect before doing
|
||||
* any Recognize or Get* operation.
|
||||
*/
|
||||
void Clear();
|
||||
|
||||
/**
|
||||
* Close down tesseract and free up all memory. End() is equivalent to
|
||||
* destructing and reconstructing your TessBaseAPI.
|
||||
* Once End() has been used, none of the other API functions may be used
|
||||
* other than Init and anything declared above it in the class definition.
|
||||
*/
|
||||
void End();
|
||||
|
||||
/**
|
||||
* Clear any library-level memory caches.
|
||||
* There are a variety of expensive-to-load constant data structures (mostly
|
||||
* language dictionaries) that are cached globally -- surviving the Init()
|
||||
* and End() of individual TessBaseAPI's. This function allows the clearing
|
||||
* of these caches.
|
||||
**/
|
||||
static void ClearPersistentCache();
|
||||
|
||||
/**
|
||||
* Check whether a word is valid according to Tesseract's language model
|
||||
* @return 0 if the word is invalid, non-zero if valid.
|
||||
* @warning temporary! This function will be removed from here and placed
|
||||
* in a separate API at some future time.
|
||||
*/
|
||||
int IsValidWord(const char *word) const;
|
||||
// Returns true if utf8_character is defined in the UniCharset.
|
||||
bool IsValidCharacter(const char *utf8_character) const;
|
||||
|
||||
bool GetTextDirection(int *out_offset, float *out_slope);
|
||||
|
||||
/** Sets Dict::letter_is_okay_ function to point to the given function. */
|
||||
void SetDictFunc(DictFunc f);
|
||||
|
||||
/** Sets Dict::probability_in_context_ function to point to the given
|
||||
* function.
|
||||
*/
|
||||
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
|
||||
|
||||
/**
|
||||
* Estimates the Orientation And Script of the image.
|
||||
* @return true if the image was processed successfully.
|
||||
*/
|
||||
bool DetectOS(OSResults *);
|
||||
|
||||
/**
|
||||
* Return text orientation of each block as determined by an earlier run
|
||||
* of layout analysis.
|
||||
*/
|
||||
void GetBlockTextOrientations(int **block_orientation,
|
||||
bool **vertical_writing);
|
||||
|
||||
/** This method returns the string form of the specified unichar. */
|
||||
const char *GetUnichar(int unichar_id) const;
|
||||
|
||||
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
|
||||
const Dawg *GetDawg(int i) const;
|
||||
|
||||
/** Return the number of dawgs loaded into tesseract_ object. */
|
||||
int NumDawgs() const;
|
||||
|
||||
Tesseract *tesseract() const {
|
||||
return tesseract_;
|
||||
}
|
||||
|
||||
OcrEngineMode oem() const {
|
||||
return last_oem_requested_;
|
||||
}
|
||||
|
||||
void set_min_orientation_margin(double margin);
|
||||
/* @} */
|
||||
|
||||
protected:
|
||||
/** Common code for setting the image. Returns true if Init has been called.
|
||||
*/
|
||||
bool InternalSetImage();
|
||||
|
||||
/**
|
||||
* Run the thresholder to make the thresholded image. If pix is not nullptr,
|
||||
* the source is thresholded to pix instead of the internal IMAGE.
|
||||
*/
|
||||
virtual bool Threshold(Pix **pix);
|
||||
|
||||
/**
|
||||
* Find lines from the image making the BLOCK_LIST.
|
||||
* @return 0 on success.
|
||||
*/
|
||||
int FindLines();
|
||||
|
||||
/** Delete the pageres and block list ready for a new page. */
|
||||
void ClearResults();
|
||||
|
||||
/**
|
||||
* Return an LTR Result Iterator -- used only for training, as we really want
|
||||
* to ignore all BiDi smarts at that point.
|
||||
* delete once you're done with it.
|
||||
*/
|
||||
LTRResultIterator *GetLTRIterator();
|
||||
|
||||
/**
|
||||
* Return the length of the output text string, as UTF8, assuming
|
||||
* one newline per line and one per block, with a terminator,
|
||||
* and assuming a single character reject marker for each rejected character.
|
||||
* Also return the number of recognized blobs in blob_count.
|
||||
*/
|
||||
int TextLength(int *blob_count) const;
|
||||
|
||||
//// paragraphs.cpp ////////////////////////////////////////////////////
|
||||
void DetectParagraphs(bool after_text_recognition);
|
||||
|
||||
const PAGE_RES *GetPageRes() const {
|
||||
return page_res_;
|
||||
}
|
||||
|
||||
protected:
|
||||
Tesseract *tesseract_; ///< The underlying data object.
|
||||
Tesseract *osd_tesseract_; ///< For orientation & script detection.
|
||||
EquationDetect *equ_detect_; ///< The equation detector.
|
||||
FileReader reader_; ///< Reads files from any filesystem.
|
||||
ImageThresholder *thresholder_; ///< Image thresholding module.
|
||||
std::vector<ParagraphModel *> *paragraph_models_;
|
||||
BLOCK_LIST *block_list_; ///< The page layout.
|
||||
PAGE_RES *page_res_; ///< The page-level data.
|
||||
std::string input_file_; ///< Name used by training code.
|
||||
std::string output_file_; ///< Name used by debug code.
|
||||
std::string datapath_; ///< Current location of tessdata.
|
||||
std::string language_; ///< Last initialized language.
|
||||
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
|
||||
bool recognition_done_; ///< page_res_ contains recognition data.
|
||||
|
||||
/**
|
||||
* @defgroup ThresholderParams Thresholder Parameters
|
||||
* Parameters saved from the Thresholder. Needed to rebuild coordinates.
|
||||
*/
|
||||
/* @{ */
|
||||
int rect_left_;
|
||||
int rect_top_;
|
||||
int rect_width_;
|
||||
int rect_height_;
|
||||
int image_width_;
|
||||
int image_height_;
|
||||
/* @} */
|
||||
|
||||
private:
|
||||
// A list of image filenames gets special consideration
|
||||
bool ProcessPagesFileList(FILE *fp, std::string *buf,
|
||||
const char *retry_config, int timeout_millisec,
|
||||
TessResultRenderer *renderer,
|
||||
int tessedit_page_number);
|
||||
// TIFF supports multipage so gets special consideration.
|
||||
bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
|
||||
const char *filename, const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer,
|
||||
int tessedit_page_number);
|
||||
}; // class TessBaseAPI.
|
||||
|
||||
/** Escape a char string - remove &<>"' with HTML codes. */
|
||||
std::string HOcrEscape(const char *text);
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_API_BASEAPI_H_
|
|
@ -1,484 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: capi.h
|
||||
// Description: C-API TessBaseAPI
|
||||
//
|
||||
// (C) Copyright 2012, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef API_CAPI_H_
|
||||
#define API_CAPI_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
# include <tesseract/baseapi.h>
|
||||
# include <tesseract/ocrclass.h>
|
||||
# include <tesseract/pageiterator.h>
|
||||
# include <tesseract/renderer.h>
|
||||
# include <tesseract/resultiterator.h>
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef BOOL
|
||||
# define BOOL int
|
||||
# define TRUE 1
|
||||
# define FALSE 0
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
typedef tesseract::TessResultRenderer TessResultRenderer;
|
||||
typedef tesseract::TessBaseAPI TessBaseAPI;
|
||||
typedef tesseract::PageIterator TessPageIterator;
|
||||
typedef tesseract::ResultIterator TessResultIterator;
|
||||
typedef tesseract::MutableIterator TessMutableIterator;
|
||||
typedef tesseract::ChoiceIterator TessChoiceIterator;
|
||||
typedef tesseract::OcrEngineMode TessOcrEngineMode;
|
||||
typedef tesseract::PageSegMode TessPageSegMode;
|
||||
typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
|
||||
typedef tesseract::Orientation TessOrientation;
|
||||
typedef tesseract::ParagraphJustification TessParagraphJustification;
|
||||
typedef tesseract::WritingDirection TessWritingDirection;
|
||||
typedef tesseract::TextlineOrder TessTextlineOrder;
|
||||
typedef tesseract::PolyBlockType TessPolyBlockType;
|
||||
typedef tesseract::ETEXT_DESC ETEXT_DESC;
|
||||
#else
|
||||
typedef struct TessResultRenderer TessResultRenderer;
|
||||
typedef struct TessBaseAPI TessBaseAPI;
|
||||
typedef struct TessPageIterator TessPageIterator;
|
||||
typedef struct TessResultIterator TessResultIterator;
|
||||
typedef struct TessMutableIterator TessMutableIterator;
|
||||
typedef struct TessChoiceIterator TessChoiceIterator;
|
||||
typedef enum TessOcrEngineMode {
|
||||
OEM_TESSERACT_ONLY,
|
||||
OEM_LSTM_ONLY,
|
||||
OEM_TESSERACT_LSTM_COMBINED,
|
||||
OEM_DEFAULT
|
||||
} TessOcrEngineMode;
|
||||
typedef enum TessPageSegMode {
|
||||
PSM_OSD_ONLY,
|
||||
PSM_AUTO_OSD,
|
||||
PSM_AUTO_ONLY,
|
||||
PSM_AUTO,
|
||||
PSM_SINGLE_COLUMN,
|
||||
PSM_SINGLE_BLOCK_VERT_TEXT,
|
||||
PSM_SINGLE_BLOCK,
|
||||
PSM_SINGLE_LINE,
|
||||
PSM_SINGLE_WORD,
|
||||
PSM_CIRCLE_WORD,
|
||||
PSM_SINGLE_CHAR,
|
||||
PSM_SPARSE_TEXT,
|
||||
PSM_SPARSE_TEXT_OSD,
|
||||
PSM_RAW_LINE,
|
||||
PSM_COUNT
|
||||
} TessPageSegMode;
|
||||
typedef enum TessPageIteratorLevel {
|
||||
RIL_BLOCK,
|
||||
RIL_PARA,
|
||||
RIL_TEXTLINE,
|
||||
RIL_WORD,
|
||||
RIL_SYMBOL
|
||||
} TessPageIteratorLevel;
|
||||
typedef enum TessPolyBlockType {
|
||||
PT_UNKNOWN,
|
||||
PT_FLOWING_TEXT,
|
||||
PT_HEADING_TEXT,
|
||||
PT_PULLOUT_TEXT,
|
||||
PT_EQUATION,
|
||||
PT_INLINE_EQUATION,
|
||||
PT_TABLE,
|
||||
PT_VERTICAL_TEXT,
|
||||
PT_CAPTION_TEXT,
|
||||
PT_FLOWING_IMAGE,
|
||||
PT_HEADING_IMAGE,
|
||||
PT_PULLOUT_IMAGE,
|
||||
PT_HORZ_LINE,
|
||||
PT_VERT_LINE,
|
||||
PT_NOISE,
|
||||
PT_COUNT
|
||||
} TessPolyBlockType;
|
||||
typedef enum TessOrientation {
|
||||
ORIENTATION_PAGE_UP,
|
||||
ORIENTATION_PAGE_RIGHT,
|
||||
ORIENTATION_PAGE_DOWN,
|
||||
ORIENTATION_PAGE_LEFT
|
||||
} TessOrientation;
|
||||
typedef enum TessParagraphJustification {
|
||||
JUSTIFICATION_UNKNOWN,
|
||||
JUSTIFICATION_LEFT,
|
||||
JUSTIFICATION_CENTER,
|
||||
JUSTIFICATION_RIGHT
|
||||
} TessParagraphJustification;
|
||||
typedef enum TessWritingDirection {
|
||||
WRITING_DIRECTION_LEFT_TO_RIGHT,
|
||||
WRITING_DIRECTION_RIGHT_TO_LEFT,
|
||||
WRITING_DIRECTION_TOP_TO_BOTTOM
|
||||
} TessWritingDirection;
|
||||
typedef enum TessTextlineOrder {
|
||||
TEXTLINE_ORDER_LEFT_TO_RIGHT,
|
||||
TEXTLINE_ORDER_RIGHT_TO_LEFT,
|
||||
TEXTLINE_ORDER_TOP_TO_BOTTOM
|
||||
} TessTextlineOrder;
|
||||
typedef struct ETEXT_DESC ETEXT_DESC;
|
||||
#endif
|
||||
|
||||
typedef bool (*TessCancelFunc)(void *cancel_this, int words);
|
||||
typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
|
||||
int bottom);
|
||||
|
||||
struct Pix;
|
||||
struct Boxa;
|
||||
struct Pixa;
|
||||
|
||||
/* General free functions */
|
||||
|
||||
TESS_API const char *TessVersion();
|
||||
TESS_API void TessDeleteText(const char *text);
|
||||
TESS_API void TessDeleteTextArray(char **arr);
|
||||
TESS_API void TessDeleteIntArray(const int *arr);
|
||||
|
||||
/* Renderer API */
|
||||
TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
|
||||
BOOL font_info);
|
||||
TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
|
||||
const char *datadir,
|
||||
BOOL textonly);
|
||||
TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
|
||||
const char *outputbase);
|
||||
|
||||
TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
|
||||
TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
|
||||
TessResultRenderer *next);
|
||||
TESS_API TessResultRenderer *TessResultRendererNext(
|
||||
TessResultRenderer *renderer);
|
||||
TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
|
||||
const char *title);
|
||||
TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
|
||||
TessBaseAPI *api);
|
||||
TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
|
||||
|
||||
TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
|
||||
TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
|
||||
TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
|
||||
|
||||
/* Base API */
|
||||
|
||||
TESS_API TessBaseAPI *TessBaseAPICreate();
|
||||
TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
|
||||
|
||||
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
|
||||
|
||||
TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
|
||||
TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
|
||||
TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
|
||||
TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
|
||||
|
||||
TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
|
||||
const char *value);
|
||||
TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
|
||||
const char *value);
|
||||
|
||||
TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
|
||||
const char *name, int *value);
|
||||
TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
|
||||
const char *name, BOOL *value);
|
||||
TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
|
||||
const char *name, double *value);
|
||||
TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
|
||||
const char *name);
|
||||
|
||||
TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
|
||||
TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
|
||||
TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode oem,
|
||||
char **configs, int configs_size);
|
||||
TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode oem);
|
||||
TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language);
|
||||
|
||||
TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode mode,
|
||||
char **configs, int configs_size, char **vars_vec,
|
||||
char **vars_values, size_t vars_vec_size,
|
||||
BOOL set_only_non_debug_params);
|
||||
|
||||
TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
|
||||
const char *language, TessOcrEngineMode mode,
|
||||
char **configs, int configs_size, char **vars_vec,
|
||||
char **vars_values, size_t vars_vec_size,
|
||||
BOOL set_only_non_debug_params);
|
||||
|
||||
TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
|
||||
const TessBaseAPI *handle);
|
||||
TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
|
||||
const TessBaseAPI *handle);
|
||||
TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
|
||||
const TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
|
||||
TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
|
||||
TessPageSegMode mode);
|
||||
TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
|
||||
const unsigned char *imagedata,
|
||||
int bytes_per_pixel, int bytes_per_line,
|
||||
int left, int top, int width, int height);
|
||||
|
||||
TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
|
||||
const unsigned char *imagedata, int width,
|
||||
int height, int bytes_per_pixel,
|
||||
int bytes_per_line);
|
||||
TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
|
||||
|
||||
TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
|
||||
|
||||
TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
|
||||
int width, int height);
|
||||
|
||||
TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
|
||||
TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
|
||||
struct Pixa **pixa);
|
||||
TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
|
||||
struct Pixa **pixa,
|
||||
int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
|
||||
BOOL raw_image, int raw_padding,
|
||||
struct Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
|
||||
struct Pixa **pixa, int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
|
||||
struct Pixa **pixa);
|
||||
TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
|
||||
struct Pixa **cc);
|
||||
TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
|
||||
TessPageIteratorLevel level,
|
||||
BOOL text_only,
|
||||
struct Pixa **pixa,
|
||||
int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
|
||||
TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
|
||||
BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
|
||||
int **paraids);
|
||||
|
||||
TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
|
||||
const TessBaseAPI *handle);
|
||||
|
||||
TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
|
||||
|
||||
TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
|
||||
const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
|
||||
int page_index, const char *filename,
|
||||
const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
|
||||
TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
|
||||
TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
|
||||
TessBaseAPI *handle);
|
||||
|
||||
TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
|
||||
TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
|
||||
int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
|
||||
TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
|
||||
TessPageSegMode mode,
|
||||
const char *wordstr);
|
||||
#endif // #ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
|
||||
TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
|
||||
TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
|
||||
float *out_slope);
|
||||
|
||||
TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
|
||||
|
||||
TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
// Call TessDeleteText(*best_script_name) to free memory allocated by this
|
||||
// function
|
||||
TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
|
||||
int *orient_deg,
|
||||
float *orient_conf,
|
||||
const char **script_name,
|
||||
float *script_conf);
|
||||
#endif // #ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
|
||||
double margin);
|
||||
|
||||
TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
|
||||
int **block_orientation,
|
||||
bool **vertical_writing);
|
||||
|
||||
/* Page iterator */
|
||||
|
||||
TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
|
||||
|
||||
TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
|
||||
|
||||
TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
|
||||
|
||||
TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
|
||||
TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
|
||||
TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
TessPageIteratorLevel element);
|
||||
|
||||
TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
int *left, int *top, int *right,
|
||||
int *bottom);
|
||||
|
||||
TESS_API TessPolyBlockType
|
||||
TessPageIteratorBlockType(const TessPageIterator *handle);
|
||||
|
||||
TESS_API struct Pix *TessPageIteratorGetBinaryImage(
|
||||
const TessPageIterator *handle, TessPageIteratorLevel level);
|
||||
|
||||
TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
int padding,
|
||||
struct Pix *original_image,
|
||||
int *left, int *top);
|
||||
|
||||
TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level, int *x1,
|
||||
int *y1, int *x2, int *y2);
|
||||
|
||||
TESS_API void TessPageIteratorOrientation(
|
||||
TessPageIterator *handle, TessOrientation *orientation,
|
||||
TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
|
||||
float *deskew_angle);
|
||||
|
||||
TESS_API void TessPageIteratorParagraphInfo(
|
||||
TessPageIterator *handle, TessParagraphJustification *justification,
|
||||
BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
|
||||
|
||||
/* Result iterator */
|
||||
|
||||
TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
|
||||
TESS_API TessResultIterator *TessResultIteratorCopy(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
|
||||
TessResultIterator *handle);
|
||||
TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
|
||||
const TessResultIterator *handle);
|
||||
|
||||
TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API const char *TessResultIteratorWordRecognitionLanguage(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API const char *TessResultIteratorWordFontAttributes(
|
||||
const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
|
||||
BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
|
||||
int *pointsize, int *font_id);
|
||||
|
||||
TESS_API BOOL
|
||||
TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
|
||||
TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
|
||||
|
||||
TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
|
||||
TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
|
||||
TESS_API const char *TessChoiceIteratorGetUTF8Text(
|
||||
const TessChoiceIterator *handle);
|
||||
TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
|
||||
|
||||
/* Progress monitor */
|
||||
|
||||
TESS_API ETEXT_DESC *TessMonitorCreate();
|
||||
TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
|
||||
TessCancelFunc cancelFunc);
|
||||
TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
|
||||
TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
|
||||
TessProgressFunc progressFunc);
|
||||
TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // API_CAPI_H_
|
|
@ -1,37 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: export.h
|
||||
// Description: Place holder
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_PLATFORM_H_
|
||||
#define TESSERACT_PLATFORM_H_
|
||||
|
||||
#ifndef TESS_API
|
||||
# if defined(_WIN32) || defined(__CYGWIN__)
|
||||
# if defined(TESS_EXPORTS)
|
||||
# define TESS_API __declspec(dllexport)
|
||||
# elif defined(TESS_IMPORTS)
|
||||
# define TESS_API __declspec(dllimport)
|
||||
# else
|
||||
# define TESS_API
|
||||
# endif
|
||||
# else
|
||||
# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
|
||||
# define TESS_API __attribute__((visibility("default")))
|
||||
# else
|
||||
# define TESS_API
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif // TESSERACT_PLATFORM_H_
|
|
@ -1,235 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: ltrresultiterator.h
|
||||
// Description: Iterator for tesseract results in strict left-to-right
|
||||
// order that avoids using tesseract internal data structures.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
||||
|
||||
#include "export.h" // for TESS_API
|
||||
#include "pageiterator.h" // for PageIterator
|
||||
#include "publictypes.h" // for PageIteratorLevel
|
||||
#include "unichar.h" // for StrongScriptDirection
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class BLOB_CHOICE_IT;
|
||||
class PAGE_RES;
|
||||
class WERD_RES;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
// Class to iterate over tesseract results, providing access to all levels
|
||||
// of the page hierarchy, without including any tesseract headers or having
|
||||
// to handle any tesseract structures.
|
||||
// WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
// therefore can only be used while the TessBaseAPI class still exists and
|
||||
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
// DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
// See tesseract/publictypes.h for the definition of PageIteratorLevel.
|
||||
// See also base class PageIterator, which contains the bulk of the interface.
|
||||
// LTRResultIterator adds text-specific methods for access to OCR output.
|
||||
|
||||
class TESS_API LTRResultIterator : public PageIterator {
|
||||
friend class ChoiceIterator;
|
||||
|
||||
public:
|
||||
// page_res and tesseract come directly from the BaseAPI.
|
||||
// The rectangle parameters are copied indirectly from the Thresholder,
|
||||
// via the BaseAPI. They represent the coordinates of some rectangle in an
|
||||
// original image (in top-left-origin coordinates) and therefore the top-left
|
||||
// needs to be added to any output boxes in order to specify coordinates
|
||||
// in the original image. See TessBaseAPI::SetRectangle.
|
||||
// The scale and scaled_yres are in case the Thresholder scaled the image
|
||||
// rectangle prior to thresholding. Any coordinates in tesseract's image
|
||||
// must be divided by scale before adding (rect_left, rect_top).
|
||||
// The scaled_yres indicates the effective resolution of the binary image
|
||||
// that tesseract has been given by the Thresholder.
|
||||
// After the constructor, Begin has already been called.
|
||||
LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
|
||||
int scaled_yres, int rect_left, int rect_top,
|
||||
int rect_width, int rect_height);
|
||||
|
||||
~LTRResultIterator() override;
|
||||
|
||||
// LTRResultIterators may be copied! This makes it possible to iterate over
|
||||
// all the objects at a lower level, while maintaining an iterator to
|
||||
// objects at a higher level. These constructors DO NOT CALL Begin, so
|
||||
// iterations will continue from the location of src.
|
||||
// TODO: For now the copy constructor and operator= only need the base class
|
||||
// versions, but if new data members are added, don't forget to add them!
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
|
||||
// See PageIterator.
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
// Returns the null terminated UTF-8 encoded text string for the current
|
||||
// object at the given level. Use delete [] to free after use.
|
||||
char *GetUTF8Text(PageIteratorLevel level) const;
|
||||
|
||||
// Set the string inserted at the end of each text line. "\n" by default.
|
||||
void SetLineSeparator(const char *new_line);
|
||||
|
||||
// Set the string inserted at the end of each paragraph. "\n" by default.
|
||||
void SetParagraphSeparator(const char *new_para);
|
||||
|
||||
// Returns the mean confidence of the current object at the given level.
|
||||
// The number should be interpreted as a percent probability. (0.0f-100.0f)
|
||||
float Confidence(PageIteratorLevel level) const;
|
||||
|
||||
// ============= Functions that refer to words only ============.
|
||||
|
||||
// Returns the font attributes of the current word. If iterating at a higher
|
||||
// level object than words, eg textlines, then this will return the
|
||||
// attributes of the first word in that textline.
|
||||
// The actual return value is a string representing a font name. It points
|
||||
// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
|
||||
// the iterator itself, ie rendered invalid by various members of
|
||||
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
|
||||
// Pointsize is returned in printers points (1/72 inch.)
|
||||
const char *WordFontAttributes(bool *is_bold, bool *is_italic,
|
||||
bool *is_underlined, bool *is_monospace,
|
||||
bool *is_serif, bool *is_smallcaps,
|
||||
int *pointsize, int *font_id) const;
|
||||
|
||||
// Return the name of the language used to recognize this word.
|
||||
// On error, nullptr. Do not delete this pointer.
|
||||
const char *WordRecognitionLanguage() const;
|
||||
|
||||
// Return the overall directionality of this word.
|
||||
StrongScriptDirection WordDirection() const;
|
||||
|
||||
// Returns true if the current word was found in a dictionary.
|
||||
bool WordIsFromDictionary() const;
|
||||
|
||||
// Returns the number of blanks before the current word.
|
||||
int BlanksBeforeWord() const;
|
||||
|
||||
// Returns true if the current word is numeric.
|
||||
bool WordIsNumeric() const;
|
||||
|
||||
// Returns true if the word contains blamer information.
|
||||
bool HasBlamerInfo() const;
|
||||
|
||||
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
|
||||
// of the current word.
|
||||
const void *GetParamsTrainingBundle() const;
|
||||
|
||||
// Returns a pointer to the string with blamer information for this word.
|
||||
// Assumes that the word's blamer_bundle is not nullptr.
|
||||
const char *GetBlamerDebug() const;
|
||||
|
||||
// Returns a pointer to the string with misadaption information for this word.
|
||||
// Assumes that the word's blamer_bundle is not nullptr.
|
||||
const char *GetBlamerMisadaptionDebug() const;
|
||||
|
||||
// Returns true if a truth string was recorded for the current word.
|
||||
bool HasTruthString() const;
|
||||
|
||||
// Returns true if the given string is equivalent to the truth string for
|
||||
// the current word.
|
||||
bool EquivalentToTruth(const char *str) const;
|
||||
|
||||
// Returns a null terminated UTF-8 encoded truth string for the current word.
|
||||
// Use delete [] to free after use.
|
||||
char *WordTruthUTF8Text() const;
|
||||
|
||||
// Returns a null terminated UTF-8 encoded normalized OCR string for the
|
||||
// current word. Use delete [] to free after use.
|
||||
char *WordNormedUTF8Text() const;
|
||||
|
||||
// Returns a pointer to serialized choice lattice.
|
||||
// Fills lattice_size with the number of bytes in lattice data.
|
||||
const char *WordLattice(int *lattice_size) const;
|
||||
|
||||
// ============= Functions that refer to symbols only ============.
|
||||
|
||||
// Returns true if the current symbol is a superscript.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsSuperscript() const;
|
||||
// Returns true if the current symbol is a subscript.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsSubscript() const;
|
||||
// Returns true if the current symbol is a dropcap.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsDropcap() const;
|
||||
|
||||
protected:
|
||||
const char *line_separator_;
|
||||
const char *paragraph_separator_;
|
||||
};
|
||||
|
||||
// Class to iterate over the classifier choices for a single RIL_SYMBOL.
|
||||
class TESS_API ChoiceIterator {
|
||||
public:
|
||||
// Construction is from a LTRResultIterator that points to the symbol of
|
||||
// interest. The ChoiceIterator allows a one-shot iteration over the
|
||||
// choices for this symbol and after that it is useless.
|
||||
explicit ChoiceIterator(const LTRResultIterator &result_it);
|
||||
~ChoiceIterator();
|
||||
|
||||
// Moves to the next choice for the symbol and returns false if there
|
||||
// are none left.
|
||||
bool Next();
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
// Returns the null terminated UTF-8 encoded text string for the current
|
||||
// choice.
|
||||
// NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
|
||||
// internal structure and should NOT be delete[]ed to free after use.
|
||||
const char *GetUTF8Text() const;
|
||||
|
||||
// Returns the confidence of the current choice depending on the used language
|
||||
// data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
|
||||
// choices for one symbol should roughly add up to 1.0f.
|
||||
// If only traineddata of the legacy engine is used, the number should be
|
||||
// interpreted as a percent probability. (0.0f-100.0f) In this case
|
||||
// probabilities won't add up to 100. Each one stands on its own.
|
||||
float Confidence() const;
|
||||
|
||||
// Returns a vector containing all timesteps, which belong to the currently
|
||||
// selected symbol. A timestep is a vector containing pairs of symbols and
|
||||
// floating point numbers. The number states the probability for the
|
||||
// corresponding symbol.
|
||||
std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
|
||||
|
||||
private:
|
||||
// clears the remaining spaces out of the results and adapt the probabilities
|
||||
void filterSpaces();
|
||||
// Pointer to the WERD_RES object owned by the API.
|
||||
WERD_RES *word_res_;
|
||||
// Iterator over the blob choices.
|
||||
BLOB_CHOICE_IT *choice_it_;
|
||||
std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
|
||||
std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
|
||||
|
||||
const int *tstep_index_;
|
||||
// regulates the rating granularity
|
||||
double rating_coefficient_;
|
||||
// leading blanks
|
||||
int blanks_before_word_;
|
||||
// true when there is lstm engine related trained data
|
||||
bool oemLSTM_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
|
@ -1,158 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
/**********************************************************************
|
||||
* File: ocrclass.h
|
||||
* Description: Class definitions and constants for the OCR API.
|
||||
* Author: Hewlett-Packard Co
|
||||
*
|
||||
* (C) Copyright 1996, Hewlett-Packard Co.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
/**********************************************************************
|
||||
* This file contains typedefs for all the structures used by
|
||||
* the HP OCR interface.
|
||||
* The structures are designed to allow them to be used with any
|
||||
* structure alignment up to 8.
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef CCUTIL_OCRCLASS_H_
|
||||
#define CCUTIL_OCRCLASS_H_
|
||||
|
||||
#include <chrono>
|
||||
#include <ctime>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/**********************************************************************
|
||||
* EANYCODE_CHAR
|
||||
* Description of a single character. The character code is defined by
|
||||
* the character set of the current font.
|
||||
* Output text is sent as an array of these structures.
|
||||
* Spaces and line endings in the output are represented in the
|
||||
* structures of the surrounding characters. They are not directly
|
||||
* represented as characters.
|
||||
* The first character in a word has a positive value of blanks.
|
||||
* Missing information should be set to the defaults in the comments.
|
||||
* If word bounds are known, but not character bounds, then the top and
|
||||
* bottom of each character should be those of the word. The left of the
|
||||
* first and right of the last char in each word should be set. All other
|
||||
* lefts and rights should be set to -1.
|
||||
* If set, the values of right and bottom are left+width and top+height.
|
||||
* Most of the members come directly from the parameters to ocr_append_char.
|
||||
* The formatting member uses the enhancement parameter and combines the
|
||||
* line direction stuff into the top 3 bits.
|
||||
* The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
|
||||
* 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
|
||||
* the coding is, only that it is backwards compatible with the previous
|
||||
* version.
|
||||
**********************************************************************/
|
||||
|
||||
struct EANYCODE_CHAR { /*single character */
|
||||
// It should be noted that the format for char_code for version 2.0 and beyond
|
||||
// is UTF8 which means that ASCII characters will come out as one structure
|
||||
// but other characters will be returned in two or more instances of this
|
||||
// structure with a single byte of the UTF8 code in each, but each will have
|
||||
// the same bounding box. Programs which want to handle languagues with
|
||||
// different characters sets will need to handle extended characters
|
||||
// appropriately, but *all* code needs to be prepared to receive UTF8 coded
|
||||
// characters for characters such as bullet and fancy quotes.
|
||||
uint16_t char_code; /*character itself */
|
||||
int16_t left; /*of char (-1) */
|
||||
int16_t right; /*of char (-1) */
|
||||
int16_t top; /*of char (-1) */
|
||||
int16_t bottom; /*of char (-1) */
|
||||
int16_t font_index; /*what font (0) */
|
||||
uint8_t confidence; /*0=perfect, 100=reject (0/100) */
|
||||
uint8_t point_size; /*of char, 72=i inch, (10) */
|
||||
int8_t blanks; /*no of spaces before this char (1) */
|
||||
uint8_t formatting; /*char formatting (0) */
|
||||
};
|
||||
|
||||
/**********************************************************************
|
||||
* ETEXT_DESC
|
||||
* Description of the output of the OCR engine.
|
||||
* This structure is used as both a progress monitor and the final
|
||||
* output header, since it needs to be a valid progress monitor while
|
||||
* the OCR engine is storing its output to shared memory.
|
||||
* During progress, all the buffer info is -1.
|
||||
* Progress starts at 0 and increases to 100 during OCR. No other constraint.
|
||||
* Additionally the progress callback contains the bounding box of the word that
|
||||
* is currently being processed.
|
||||
* Every progress callback, the OCR engine must set ocr_alive to 1.
|
||||
* The HP side will set ocr_alive to 0. Repeated failure to reset
|
||||
* to 1 indicates that the OCR engine is dead.
|
||||
* If the cancel function is not null then it is called with the number of
|
||||
* user words found. If it returns true then operation is cancelled.
|
||||
**********************************************************************/
|
||||
class ETEXT_DESC;
|
||||
|
||||
using CANCEL_FUNC = bool (*)(void *, int);
|
||||
using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
|
||||
using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
|
||||
|
||||
class ETEXT_DESC { // output header
|
||||
public:
|
||||
int16_t count{0}; /// chars in this buffer(0)
|
||||
int16_t progress{0}; /// percent complete increasing (0-100)
|
||||
/** Progress monitor covers word recognition and it does not cover layout
|
||||
* analysis.
|
||||
* See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
|
||||
int8_t more_to_come{0}; /// true if not last
|
||||
volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
|
||||
int8_t err_code{0}; /// for errcode use
|
||||
CANCEL_FUNC cancel{nullptr}; /// returns true to cancel
|
||||
PROGRESS_FUNC progress_callback{
|
||||
nullptr}; /// called whenever progress increases
|
||||
PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
|
||||
void *cancel_this{nullptr}; /// this or other data for cancel
|
||||
std::chrono::steady_clock::time_point end_time;
|
||||
/// Time to stop. Expected to be set only
|
||||
/// by call to set_deadline_msecs().
|
||||
EANYCODE_CHAR text[1]{}; /// character data
|
||||
|
||||
ETEXT_DESC() : progress_callback2(&default_progress_func) {
|
||||
end_time = std::chrono::time_point<std::chrono::steady_clock,
|
||||
std::chrono::milliseconds>();
|
||||
}
|
||||
|
||||
// Sets the end time to be deadline_msecs milliseconds from now.
|
||||
void set_deadline_msecs(int32_t deadline_msecs) {
|
||||
if (deadline_msecs > 0) {
|
||||
end_time = std::chrono::steady_clock::now() +
|
||||
std::chrono::milliseconds(deadline_msecs);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns false if we've not passed the end_time, or have not set a deadline.
|
||||
bool deadline_exceeded() const {
|
||||
if (end_time.time_since_epoch() ==
|
||||
std::chrono::steady_clock::duration::zero()) {
|
||||
return false;
|
||||
}
|
||||
auto now = std::chrono::steady_clock::now();
|
||||
return (now > end_time);
|
||||
}
|
||||
|
||||
private:
|
||||
static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
|
||||
int top, int bottom) {
|
||||
if (ths->progress_callback != nullptr) {
|
||||
return (*(ths->progress_callback))(ths->progress, left, right, top,
|
||||
bottom);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // CCUTIL_OCRCLASS_H_
|
|
@ -1,139 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: osdetect.h
|
||||
// Description: Orientation and script detection.
|
||||
// Author: Samuel Charron
|
||||
// Ranjith Unnikrishnan
|
||||
//
|
||||
// (C) Copyright 2008, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_OSDETECT_H_
|
||||
#define TESSERACT_CCMAIN_OSDETECT_H_
|
||||
|
||||
#include "export.h" // for TESS_API
|
||||
|
||||
#include <vector> // for std::vector
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class BLOBNBOX;
|
||||
class BLOBNBOX_CLIST;
|
||||
class BLOB_CHOICE_LIST;
|
||||
class TO_BLOCK_LIST;
|
||||
class UNICHARSET;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
|
||||
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
|
||||
|
||||
struct OSBestResult {
|
||||
OSBestResult()
|
||||
: orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
|
||||
int orientation_id;
|
||||
int script_id;
|
||||
float sconfidence;
|
||||
float oconfidence;
|
||||
};
|
||||
|
||||
struct OSResults {
|
||||
OSResults() : unicharset(nullptr) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
|
||||
scripts_na[i][j] = 0;
|
||||
}
|
||||
orientations[i] = 0;
|
||||
}
|
||||
}
|
||||
void update_best_orientation();
|
||||
// Set the estimate of the orientation to the given id.
|
||||
void set_best_orientation(int orientation_id);
|
||||
// Update/Compute the best estimate of the script assuming the given
|
||||
// orientation id.
|
||||
void update_best_script(int orientation_id);
|
||||
// Return the index of the script with the highest score for this orientation.
|
||||
TESS_API int get_best_script(int orientation_id) const;
|
||||
// Accumulate scores with given OSResults instance and update the best script.
|
||||
void accumulate(const OSResults &osr);
|
||||
|
||||
// Print statistics.
|
||||
void print_scores(void) const;
|
||||
void print_scores(int orientation_id) const;
|
||||
|
||||
// Array holding scores for each orientation id [0,3].
|
||||
// Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
|
||||
// page respectively, where the values refer to the amount of clockwise
|
||||
// rotation to be applied to the page for the text to be upright and readable.
|
||||
float orientations[4];
|
||||
// Script confidence scores for each of 4 possible orientations.
|
||||
float scripts_na[4][kMaxNumberOfScripts];
|
||||
|
||||
UNICHARSET *unicharset;
|
||||
OSBestResult best_result;
|
||||
};
|
||||
|
||||
class OrientationDetector {
|
||||
public:
|
||||
OrientationDetector(const std::vector<int> *allowed_scripts,
|
||||
OSResults *results);
|
||||
bool detect_blob(BLOB_CHOICE_LIST *scores);
|
||||
int get_orientation();
|
||||
|
||||
private:
|
||||
OSResults *osr_;
|
||||
const std::vector<int> *allowed_scripts_;
|
||||
};
|
||||
|
||||
class ScriptDetector {
|
||||
public:
|
||||
ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
void detect_blob(BLOB_CHOICE_LIST *scores);
|
||||
bool must_stop(int orientation) const;
|
||||
|
||||
private:
|
||||
OSResults *osr_;
|
||||
static const char *korean_script_;
|
||||
static const char *japanese_script_;
|
||||
static const char *fraktur_script_;
|
||||
int korean_id_;
|
||||
int japanese_id_;
|
||||
int katakana_id_;
|
||||
int hiragana_id_;
|
||||
int han_id_;
|
||||
int hangul_id_;
|
||||
int latin_id_;
|
||||
int fraktur_id_;
|
||||
tesseract::Tesseract *tess_;
|
||||
const std::vector<int> *allowed_scripts_;
|
||||
};
|
||||
|
||||
int orientation_and_script_detection(const char *filename, OSResults *,
|
||||
tesseract::Tesseract *);
|
||||
|
||||
int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
|
||||
int os_detect_blobs(const std::vector<int> *allowed_scripts,
|
||||
BLOBNBOX_CLIST *blob_list, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
|
||||
bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
|
||||
OSResults *, tesseract::Tesseract *tess);
|
||||
|
||||
// Helper method to convert an orientation index to its value in degrees.
|
||||
// The value represents the amount of clockwise rotation in degrees that must be
|
||||
// applied for the text to be upright (readable).
|
||||
TESS_API int OrientationIdToValue(const int &id);
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCMAIN_OSDETECT_H_
|
|
@ -1,364 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: pageiterator.h
|
||||
// Description: Iterator for tesseract page structure that avoids using
|
||||
// tesseract internal data structures.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_PAGEITERATOR_H_
|
||||
|
||||
#include "export.h"
|
||||
#include "publictypes.h"
|
||||
|
||||
struct Pix;
|
||||
struct Pta;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
struct BlamerBundle;
|
||||
class C_BLOB_IT;
|
||||
class PAGE_RES;
|
||||
class PAGE_RES_IT;
|
||||
class WERD;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
/**
|
||||
* Class to iterate over tesseract page structure, providing access to all
|
||||
* levels of the page hierarchy, without including any tesseract headers or
|
||||
* having to handle any tesseract structures.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
* See tesseract/publictypes.h for the definition of PageIteratorLevel.
|
||||
* See also ResultIterator, derived from PageIterator, which adds in the
|
||||
* ability to access OCR output with text-specific methods.
|
||||
*/
|
||||
|
||||
class TESS_API PageIterator {
|
||||
public:
|
||||
/**
|
||||
* page_res and tesseract come directly from the BaseAPI.
|
||||
* The rectangle parameters are copied indirectly from the Thresholder,
|
||||
* via the BaseAPI. They represent the coordinates of some rectangle in an
|
||||
* original image (in top-left-origin coordinates) and therefore the top-left
|
||||
* needs to be added to any output boxes in order to specify coordinates
|
||||
* in the original image. See TessBaseAPI::SetRectangle.
|
||||
* The scale and scaled_yres are in case the Thresholder scaled the image
|
||||
* rectangle prior to thresholding. Any coordinates in tesseract's image
|
||||
* must be divided by scale before adding (rect_left, rect_top).
|
||||
* The scaled_yres indicates the effective resolution of the binary image
|
||||
* that tesseract has been given by the Thresholder.
|
||||
* After the constructor, Begin has already been called.
|
||||
*/
|
||||
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
|
||||
int scaled_yres, int rect_left, int rect_top, int rect_width,
|
||||
int rect_height);
|
||||
virtual ~PageIterator();
|
||||
|
||||
/**
|
||||
* Page/ResultIterators may be copied! This makes it possible to iterate over
|
||||
* all the objects at a lower level, while maintaining an iterator to
|
||||
* objects at a higher level. These constructors DO NOT CALL Begin, so
|
||||
* iterations will continue from the location of src.
|
||||
*/
|
||||
PageIterator(const PageIterator &src);
|
||||
const PageIterator &operator=(const PageIterator &src);
|
||||
|
||||
/** Are we positioned at the same location as other? */
|
||||
bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
|
||||
/**
|
||||
* Moves the iterator to point to the start of the page to begin an
|
||||
* iteration.
|
||||
*/
|
||||
virtual void Begin();
|
||||
|
||||
/**
|
||||
* Moves the iterator to the beginning of the paragraph.
|
||||
* This class implements this functionality by moving it to the zero indexed
|
||||
* blob of the first (leftmost) word on the first row of the paragraph.
|
||||
*/
|
||||
virtual void RestartParagraph();
|
||||
|
||||
/**
|
||||
* Return whether this iterator points anywhere in the first textline of a
|
||||
* paragraph.
|
||||
*/
|
||||
bool IsWithinFirstTextlineOfParagraph() const;
|
||||
|
||||
/**
|
||||
* Moves the iterator to the beginning of the text line.
|
||||
* This class implements this functionality by moving it to the zero indexed
|
||||
* blob of the first (leftmost) word of the row.
|
||||
*/
|
||||
virtual void RestartRow();
|
||||
|
||||
/**
|
||||
* Moves to the start of the next object at the given level in the
|
||||
* page hierarchy, and returns false if the end of the page was reached.
|
||||
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
|
||||
* PageIteratorLevel level values will visit each non-text block once.
|
||||
* Think of non text blocks as containing a single para, with a single line,
|
||||
* with a single imaginary word.
|
||||
* Calls to Next with different levels may be freely intermixed.
|
||||
* This function iterates words in right-to-left scripts correctly, if
|
||||
* the appropriate language has been loaded into Tesseract.
|
||||
*/
|
||||
virtual bool Next(PageIteratorLevel level);
|
||||
|
||||
/**
|
||||
* Returns true if the iterator is at the start of an object at the given
|
||||
* level.
|
||||
*
|
||||
* For instance, suppose an iterator it is pointed to the first symbol of the
|
||||
* first word of the third line of the second paragraph of the first block in
|
||||
* a page, then:
|
||||
* it.IsAtBeginningOf(RIL_BLOCK) = false
|
||||
* it.IsAtBeginningOf(RIL_PARA) = false
|
||||
* it.IsAtBeginningOf(RIL_TEXTLINE) = true
|
||||
* it.IsAtBeginningOf(RIL_WORD) = true
|
||||
* it.IsAtBeginningOf(RIL_SYMBOL) = true
|
||||
*/
|
||||
virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns whether the iterator is positioned at the last element in a
|
||||
* given level. (e.g. the last word in a line, the last line in a block)
|
||||
*
|
||||
* Here's some two-paragraph example
|
||||
* text. It starts off innocuously
|
||||
* enough but quickly turns bizarre.
|
||||
* The author inserts a cornucopia
|
||||
* of words to guard against confused
|
||||
* references.
|
||||
*
|
||||
* Now take an iterator it pointed to the start of "bizarre."
|
||||
* it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
|
||||
* it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
|
||||
* it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
|
||||
*/
|
||||
virtual bool IsAtFinalElement(PageIteratorLevel level,
|
||||
PageIteratorLevel element) const;
|
||||
|
||||
/**
|
||||
* Returns whether this iterator is positioned
|
||||
* before other: -1
|
||||
* equal to other: 0
|
||||
* after other: 1
|
||||
*/
|
||||
int Cmp(const PageIterator &other) const;
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
// Coordinate system:
|
||||
// Integer coordinates are at the cracks between the pixels.
|
||||
// The top-left corner of the top-left pixel in the image is at (0,0).
|
||||
// The bottom-right corner of the bottom-right pixel in the image is at
|
||||
// (width, height).
|
||||
// Every bounding box goes from the top-left of the top-left contained
|
||||
// pixel to the bottom-right of the bottom-right contained pixel, so
|
||||
// the bounding box of the single top-left pixel in the image is:
|
||||
// (0,0)->(1,1).
|
||||
// If an image rectangle has been set in the API, then returned coordinates
|
||||
// relate to the original (full) image, rather than the rectangle.
|
||||
|
||||
/**
|
||||
* Controls what to include in a bounding box. Bounding boxes of all levels
|
||||
* between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
|
||||
* Between layout analysis and recognition, it isn't known where all
|
||||
* diacritics belong, so this control is used to include or exclude some
|
||||
* diacritics that are above or below the main body of the word. In most cases
|
||||
* where the placement is obvious, and after recognition, it doesn't make as
|
||||
* much difference, as the diacritics will already be included in the word.
|
||||
*/
|
||||
void SetBoundingBoxComponents(bool include_upper_dots,
|
||||
bool include_lower_dots) {
|
||||
include_upper_dots_ = include_upper_dots;
|
||||
include_lower_dots_ = include_lower_dots;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the bounding rectangle of the current object at the given level.
|
||||
* See comment on coordinate system above.
|
||||
* Returns false if there is no such object at the current position.
|
||||
* The returned bounding box is guaranteed to match the size and position
|
||||
* of the image returned by GetBinaryImage, but may clip foreground pixels
|
||||
* from a grey image. The padding argument to GetImage can be used to expand
|
||||
* the image to include more foreground pixels. See GetImage below.
|
||||
*/
|
||||
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
|
||||
int *bottom) const;
|
||||
bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
|
||||
int *right, int *bottom) const;
|
||||
/**
|
||||
* Returns the bounding rectangle of the object in a coordinate system of the
|
||||
* working image rectangle having its origin at (rect_left_, rect_top_) with
|
||||
* respect to the original image and is scaled by a factor scale_.
|
||||
*/
|
||||
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
|
||||
int *right, int *bottom) const;
|
||||
|
||||
/** Returns whether there is no object of a given level. */
|
||||
bool Empty(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns the type of the current block.
|
||||
* See tesseract/publictypes.h for PolyBlockType.
|
||||
*/
|
||||
PolyBlockType BlockType() const;
|
||||
|
||||
/**
|
||||
* Returns the polygon outline of the current block. The returned Pta must
|
||||
* be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
|
||||
* of the polygon, and the last edge is the line segment between the last
|
||||
* point and the first point. nullptr will be returned if the iterator is
|
||||
* at the end of the document or layout analysis was not used.
|
||||
*/
|
||||
Pta *BlockPolygon() const;
|
||||
|
||||
/**
|
||||
* Returns a binary image of the current object at the given level.
|
||||
* The position and size match the return from BoundingBoxInternal, and so
|
||||
* this could be upscaled with respect to the original input image.
|
||||
* Use pixDestroy to delete the image after use.
|
||||
*/
|
||||
Pix *GetBinaryImage(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns an image of the current object at the given level in greyscale
|
||||
* if available in the input. To guarantee a binary image use BinaryImage.
|
||||
* NOTE that in order to give the best possible image, the bounds are
|
||||
* expanded slightly over the binary connected component, by the supplied
|
||||
* padding, so the top-left position of the returned image is returned
|
||||
* in (left,top). These will most likely not match the coordinates
|
||||
* returned by BoundingBox.
|
||||
* If you do not supply an original image, you will get a binary one.
|
||||
* Use pixDestroy to delete the image after use.
|
||||
*/
|
||||
Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
|
||||
int *left, int *top) const;
|
||||
|
||||
/**
|
||||
* Returns the baseline of the current object at the given level.
|
||||
* The baseline is the line that passes through (x1, y1) and (x2, y2).
|
||||
* WARNING: with vertical text, baselines may be vertical!
|
||||
* Returns false if there is no baseline at the current position.
|
||||
*/
|
||||
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
|
||||
int *y2) const;
|
||||
|
||||
// Returns the attributes of the current row.
|
||||
void RowAttributes(float *row_height, float *descenders,
|
||||
float *ascenders) const;
|
||||
|
||||
/**
|
||||
* Returns orientation for the block the iterator points to.
|
||||
* orientation, writing_direction, textline_order: see publictypes.h
|
||||
* deskew_angle: after rotating the block so the text orientation is
|
||||
* upright, how many radians does one have to rotate the
|
||||
* block anti-clockwise for it to be level?
|
||||
* -Pi/4 <= deskew_angle <= Pi/4
|
||||
*/
|
||||
void Orientation(tesseract::Orientation *orientation,
|
||||
tesseract::WritingDirection *writing_direction,
|
||||
tesseract::TextlineOrder *textline_order,
|
||||
float *deskew_angle) const;
|
||||
|
||||
/**
|
||||
* Returns information about the current paragraph, if available.
|
||||
*
|
||||
* justification -
|
||||
* LEFT if ragged right, or fully justified and script is left-to-right.
|
||||
* RIGHT if ragged left, or fully justified and script is right-to-left.
|
||||
* unknown if it looks like source code or we have very few lines.
|
||||
* is_list_item -
|
||||
* true if we believe this is a member of an ordered or unordered list.
|
||||
* is_crown -
|
||||
* true if the first line of the paragraph is aligned with the other
|
||||
* lines of the paragraph even though subsequent paragraphs have first
|
||||
* line indents. This typically indicates that this is the continuation
|
||||
* of a previous paragraph or that it is the very first paragraph in
|
||||
* the chapter.
|
||||
* first_line_indent -
|
||||
* For LEFT aligned paragraphs, the first text line of paragraphs of
|
||||
* this kind are indented this many pixels from the left edge of the
|
||||
* rest of the paragraph.
|
||||
* for RIGHT aligned paragraphs, the first text line of paragraphs of
|
||||
* this kind are indented this many pixels from the right edge of the
|
||||
* rest of the paragraph.
|
||||
* NOTE 1: This value may be negative.
|
||||
* NOTE 2: if *is_crown == true, the first line of this paragraph is
|
||||
* actually flush, and first_line_indent is set to the "common"
|
||||
* first_line_indent for subsequent paragraphs in this block
|
||||
* of text.
|
||||
*/
|
||||
void ParagraphInfo(tesseract::ParagraphJustification *justification,
|
||||
bool *is_list_item, bool *is_crown,
|
||||
int *first_line_indent) const;
|
||||
|
||||
// If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
|
||||
// of the current word to the given pointer (takes ownership of the pointer)
|
||||
// and returns true.
|
||||
// Can only be used when iterating on the word level.
|
||||
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Sets up the internal data for iterating the blobs of a new word, then
|
||||
* moves the iterator to the given offset.
|
||||
*/
|
||||
void BeginWord(int offset);
|
||||
|
||||
/** Pointer to the page_res owned by the API. */
|
||||
PAGE_RES *page_res_;
|
||||
/** Pointer to the Tesseract object owned by the API. */
|
||||
Tesseract *tesseract_;
|
||||
/**
|
||||
* The iterator to the page_res_. Owned by this ResultIterator.
|
||||
* A pointer just to avoid dragging in Tesseract includes.
|
||||
*/
|
||||
PAGE_RES_IT *it_;
|
||||
/**
|
||||
* The current input WERD being iterated. If there is an output from OCR,
|
||||
* then word_ is nullptr. Owned by the API
|
||||
*/
|
||||
WERD *word_;
|
||||
/** The length of the current word_. */
|
||||
int word_length_;
|
||||
/** The current blob index within the word. */
|
||||
int blob_index_;
|
||||
/**
|
||||
* Iterator to the blobs within the word. If nullptr, then we are iterating
|
||||
* OCR results in the box_word.
|
||||
* Owned by this ResultIterator.
|
||||
*/
|
||||
C_BLOB_IT *cblob_it_;
|
||||
/** Control over what to include in bounding boxes. */
|
||||
bool include_upper_dots_;
|
||||
bool include_lower_dots_;
|
||||
/** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
|
||||
int scale_;
|
||||
int scaled_yres_;
|
||||
int rect_left_;
|
||||
int rect_top_;
|
||||
int rect_width_;
|
||||
int rect_height_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_
|
|
@ -1,281 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: publictypes.h
|
||||
// Description: Types used in both the API and internally
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// This file contains types that are used both by the API and internally
|
||||
// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
|
||||
// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
|
||||
// Restated: It is OK for low-level Tesseract files to include publictypes.h,
|
||||
// but not for the low-level tesseract code to include top-level API code.
|
||||
// This file should not use other Tesseract types, as that would drag
|
||||
// their includes into the API-level.
|
||||
|
||||
/** Number of printers' points in an inch. The unit of the pointsize return. */
|
||||
constexpr int kPointsPerInch = 72;
|
||||
/**
|
||||
* Minimum believable resolution. Used as a default if there is no other
|
||||
* information, as it is safer to under-estimate than over-estimate.
|
||||
*/
|
||||
constexpr int kMinCredibleResolution = 70;
|
||||
/** Maximum believable resolution. */
|
||||
constexpr int kMaxCredibleResolution = 2400;
|
||||
/**
|
||||
* Ratio between median blob size and likely resolution. Used to estimate
|
||||
* resolution when none is provided. This is basically 1/usual text size in
|
||||
* inches. */
|
||||
constexpr int kResolutionEstimationFactor = 10;
|
||||
|
||||
/**
|
||||
* Possible types for a POLY_BLOCK or ColPartition.
|
||||
* Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
|
||||
* below, as well as kPolyBlockNames in layout_test.cc.
|
||||
* Used extensively by ColPartition, and POLY_BLOCK.
|
||||
*/
|
||||
enum PolyBlockType {
|
||||
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
|
||||
PT_FLOWING_TEXT, // Text that lives inside a column.
|
||||
PT_HEADING_TEXT, // Text that spans more than one column.
|
||||
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
|
||||
PT_EQUATION, // Partition belonging to an equation region.
|
||||
PT_INLINE_EQUATION, // Partition has inline equation.
|
||||
PT_TABLE, // Partition belonging to a table region.
|
||||
PT_VERTICAL_TEXT, // Text-line runs vertically.
|
||||
PT_CAPTION_TEXT, // Text that belongs to an image.
|
||||
PT_FLOWING_IMAGE, // Image that lives inside a column.
|
||||
PT_HEADING_IMAGE, // Image that spans more than one column.
|
||||
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
|
||||
PT_HORZ_LINE, // Horizontal Line.
|
||||
PT_VERT_LINE, // Vertical Line.
|
||||
PT_NOISE, // Lies outside of any column.
|
||||
PT_COUNT
|
||||
};
|
||||
|
||||
/** Returns true if PolyBlockType is of horizontal line type */
|
||||
inline bool PTIsLineType(PolyBlockType type) {
|
||||
return type == PT_HORZ_LINE || type == PT_VERT_LINE;
|
||||
}
|
||||
/** Returns true if PolyBlockType is of image type */
|
||||
inline bool PTIsImageType(PolyBlockType type) {
|
||||
return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
|
||||
type == PT_PULLOUT_IMAGE;
|
||||
}
|
||||
/** Returns true if PolyBlockType is of text type */
|
||||
inline bool PTIsTextType(PolyBlockType type) {
|
||||
return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
|
||||
type == PT_PULLOUT_TEXT || type == PT_TABLE ||
|
||||
type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
|
||||
type == PT_INLINE_EQUATION;
|
||||
}
|
||||
// Returns true if PolyBlockType is of pullout(inter-column) type
|
||||
inline bool PTIsPulloutType(PolyBlockType type) {
|
||||
return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
|
||||
}
|
||||
|
||||
/**
|
||||
* +------------------+ Orientation Example:
|
||||
* | 1 Aaaa Aaaa Aaaa | ====================
|
||||
* | Aaa aa aaa aa | To left is a diagram of some (1) English and
|
||||
* | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit.
|
||||
* | 2 |
|
||||
* | ####### c c C | Upright Latin characters are represented as A and a.
|
||||
* | ####### c c c | '<' represents a latin character rotated
|
||||
* | < ####### c c c | anti-clockwise 90 degrees.
|
||||
* | < ####### c c |
|
||||
* | < ####### . c | Upright Chinese characters are represented C and c.
|
||||
* | 3 ####### c |
|
||||
* +------------------+ NOTA BENE: enum values here should match goodoc.proto
|
||||
|
||||
* If you orient your head so that "up" aligns with Orientation,
|
||||
* then the characters will appear "right side up" and readable.
|
||||
*
|
||||
* In the example above, both the English and Chinese paragraphs are oriented
|
||||
* so their "up" is the top of the page (page up). The photo credit is read
|
||||
* with one's head turned leftward ("up" is to page left).
|
||||
*
|
||||
* The values of this enum match the convention of Tesseract's osdetect.h
|
||||
*/
|
||||
enum Orientation {
|
||||
ORIENTATION_PAGE_UP = 0,
|
||||
ORIENTATION_PAGE_RIGHT = 1,
|
||||
ORIENTATION_PAGE_DOWN = 2,
|
||||
ORIENTATION_PAGE_LEFT = 3,
|
||||
};
|
||||
|
||||
/**
|
||||
* The grapheme clusters within a line of text are laid out logically
|
||||
* in this direction, judged when looking at the text line rotated so that
|
||||
* its Orientation is "page up".
|
||||
*
|
||||
* For English text, the writing direction is left-to-right. For the
|
||||
* Chinese text in the above example, the writing direction is top-to-bottom.
|
||||
*/
|
||||
enum WritingDirection {
|
||||
WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
|
||||
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
|
||||
WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
|
||||
};
|
||||
|
||||
/**
|
||||
* The text lines are read in the given sequence.
|
||||
*
|
||||
* In English, the order is top-to-bottom.
|
||||
* In Chinese, vertical text lines are read right-to-left. Mongolian is
|
||||
* written in vertical columns top to bottom like Chinese, but the lines
|
||||
* order left-to right.
|
||||
*
|
||||
* Note that only some combinations make sense. For example,
|
||||
* WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
|
||||
*/
|
||||
enum TextlineOrder {
|
||||
TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
|
||||
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
|
||||
TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
|
||||
};
|
||||
|
||||
/**
|
||||
* Possible modes for page layout analysis. These *must* be kept in order
|
||||
* of decreasing amount of layout analysis to be done, except for OSD_ONLY,
|
||||
* so that the inequality test macros below work.
|
||||
*/
|
||||
enum PageSegMode {
|
||||
PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
|
||||
PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
|
||||
///< script detection. (OSD)
|
||||
PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
|
||||
PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
|
||||
PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
|
||||
PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
|
||||
///< vertically aligned text.
|
||||
PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
|
||||
PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
|
||||
PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
|
||||
PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
|
||||
PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
|
||||
PSM_SPARSE_TEXT =
|
||||
11, ///< Find as much text as possible in no particular order.
|
||||
PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
|
||||
PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
|
||||
///< hacks that are Tesseract-specific.
|
||||
|
||||
PSM_COUNT ///< Number of enum entries.
|
||||
};
|
||||
|
||||
/**
|
||||
* Inline functions that act on a PageSegMode to determine whether components of
|
||||
* layout analysis are enabled.
|
||||
* *Depend critically on the order of elements of PageSegMode.*
|
||||
* NOTE that arg is an int for compatibility with INT_PARAM.
|
||||
*/
|
||||
inline bool PSM_OSD_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
|
||||
}
|
||||
inline bool PSM_SPARSE(int pageseg_mode) {
|
||||
return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
|
||||
}
|
||||
inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
|
||||
}
|
||||
inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
|
||||
return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
|
||||
pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
|
||||
/**
|
||||
* enum of the elements of the page hierarchy, used in ResultIterator
|
||||
* to provide functions that operate on each level without having to
|
||||
* have 5x as many functions.
|
||||
*/
|
||||
enum PageIteratorLevel {
|
||||
RIL_BLOCK, // Block of text/image/separator line.
|
||||
RIL_PARA, // Paragraph within a block.
|
||||
RIL_TEXTLINE, // Line within a paragraph.
|
||||
RIL_WORD, // Word within a textline.
|
||||
RIL_SYMBOL // Symbol/character within a word.
|
||||
};
|
||||
|
||||
/**
|
||||
* JUSTIFICATION_UNKNOWN
|
||||
* The alignment is not clearly one of the other options. This could happen
|
||||
* for example if there are only one or two lines of text or the text looks
|
||||
* like source code or poetry.
|
||||
*
|
||||
* NOTA BENE: Fully justified paragraphs (text aligned to both left and right
|
||||
* margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
|
||||
* is written with a left-to-right script and with JUSTIFICATION_RIGHT if
|
||||
* their text is written in a right-to-left script.
|
||||
*
|
||||
* Interpretation for text read in vertical lines:
|
||||
* "Left" is wherever the starting reading position is.
|
||||
*
|
||||
* JUSTIFICATION_LEFT
|
||||
* Each line, except possibly the first, is flush to the same left tab stop.
|
||||
*
|
||||
* JUSTIFICATION_CENTER
|
||||
* The text lines of the paragraph are centered about a line going
|
||||
* down through their middle of the text lines.
|
||||
*
|
||||
* JUSTIFICATION_RIGHT
|
||||
* Each line, except possibly the first, is flush to the same right tab stop.
|
||||
*/
|
||||
enum ParagraphJustification {
|
||||
JUSTIFICATION_UNKNOWN,
|
||||
JUSTIFICATION_LEFT,
|
||||
JUSTIFICATION_CENTER,
|
||||
JUSTIFICATION_RIGHT,
|
||||
};
|
||||
|
||||
/**
|
||||
* When Tesseract/Cube is initialized we can choose to instantiate/load/run
|
||||
* only the Tesseract part, only the Cube part or both along with the combiner.
|
||||
* The preference of which engine to use is stored in tessedit_ocr_engine_mode.
|
||||
*
|
||||
* ATTENTION: When modifying this enum, please make sure to make the
|
||||
* appropriate changes to all the enums mirroring it (e.g. OCREngine in
|
||||
* cityblock/workflow/detection/detection_storage.proto). Such enums will
|
||||
* mention the connection to OcrEngineMode in the comments.
|
||||
*/
|
||||
enum OcrEngineMode {
|
||||
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
|
||||
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
|
||||
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
|
||||
// to Tesseract when things get difficult.
|
||||
// deprecated
|
||||
OEM_DEFAULT, // Specify this mode when calling init_*(),
|
||||
// to indicate that any of the above modes
|
||||
// should be automatically inferred from the
|
||||
// variables in the language-specific config,
|
||||
// command-line configs, or if not specified
|
||||
// in any of the above should be set to the
|
||||
// default OEM_TESSERACT_ONLY.
|
||||
OEM_COUNT // Number of OEMs
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
|
@ -1,311 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: renderer.h
|
||||
// Description: Rendering interface to inject into TessBaseAPI
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_RENDERER_H_
|
||||
#define TESSERACT_API_RENDERER_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
|
||||
// complexity of includes here. Use forward declarations wherever possible
|
||||
// and hide includes of complex types in baseapi.cpp.
|
||||
#include <cstdint>
|
||||
#include <string> // for std::string
|
||||
#include <vector> // for std::vector
|
||||
|
||||
struct Pix;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class TessBaseAPI;
|
||||
|
||||
/**
|
||||
* Interface for rendering tesseract results into a document, such as text,
|
||||
* HOCR or pdf. This class is abstract. Specific classes handle individual
|
||||
* formats. This interface is then used to inject the renderer class into
|
||||
* tesseract when processing images.
|
||||
*
|
||||
* For simplicity implementing this with tesseract version 3.01,
|
||||
* the renderer contains document state that is cleared from document
|
||||
* to document just as the TessBaseAPI is. This way the base API can just
|
||||
* delegate its rendering functionality to injected renderers, and the
|
||||
* renderers can manage the associated state needed for the specific formats
|
||||
* in addition to the heuristics for producing it.
|
||||
*/
|
||||
class TESS_API TessResultRenderer {
|
||||
public:
|
||||
virtual ~TessResultRenderer();
|
||||
|
||||
// Takes ownership of pointer so must be new'd instance.
|
||||
// Renderers aren't ordered, but appends the sequences of next parameter
|
||||
// and existing next(). The renderers should be unique across both lists.
|
||||
void insert(TessResultRenderer *next);
|
||||
|
||||
// Returns the next renderer or nullptr.
|
||||
TessResultRenderer *next() {
|
||||
return next_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts a new document with the given title.
|
||||
* This clears the contents of the output data.
|
||||
* Title should use UTF-8 encoding.
|
||||
*/
|
||||
bool BeginDocument(const char *title);
|
||||
|
||||
/**
|
||||
* Adds the recognized text from the source image to the current document.
|
||||
* Invalid if BeginDocument not yet called.
|
||||
*
|
||||
* Note that this API is a bit weird but is designed to fit into the
|
||||
* current TessBaseAPI implementation where the api has lots of state
|
||||
* information that we might want to add in.
|
||||
*/
|
||||
bool AddImage(TessBaseAPI *api);
|
||||
|
||||
/**
|
||||
* Finishes the document and finalizes the output data
|
||||
* Invalid if BeginDocument not yet called.
|
||||
*/
|
||||
bool EndDocument();
|
||||
|
||||
const char *file_extension() const {
|
||||
return file_extension_;
|
||||
}
|
||||
const char *title() const {
|
||||
return title_.c_str();
|
||||
}
|
||||
|
||||
// Is everything fine? Otherwise something went wrong.
|
||||
bool happy() const {
|
||||
return happy_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the last image given to AddImage
|
||||
* (i.e. images are incremented whether the image succeeded or not)
|
||||
*
|
||||
* This is always defined. It means either the number of the
|
||||
* current image, the last image ended, or in the completed document
|
||||
* depending on when in the document lifecycle you are looking at it.
|
||||
* Will return -1 if a document was never started.
|
||||
*/
|
||||
int imagenum() const {
|
||||
return imagenum_;
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Called by concrete classes.
|
||||
*
|
||||
* outputbase is the name of the output file excluding
|
||||
* extension. For example, "/path/to/chocolate-chip-cookie-recipe"
|
||||
*
|
||||
* extension indicates the file extension to be used for output
|
||||
* files. For example "pdf" will produce a .pdf file, and "hocr"
|
||||
* will produce .hocr files.
|
||||
*/
|
||||
TessResultRenderer(const char *outputbase, const char *extension);
|
||||
|
||||
// Hook for specialized handling in BeginDocument()
|
||||
virtual bool BeginDocumentHandler();
|
||||
|
||||
// This must be overridden to render the OCR'd results
|
||||
virtual bool AddImageHandler(TessBaseAPI *api) = 0;
|
||||
|
||||
// Hook for specialized handling in EndDocument()
|
||||
virtual bool EndDocumentHandler();
|
||||
|
||||
// Renderers can call this to append '\0' terminated strings into
|
||||
// the output string returned by GetOutput.
|
||||
// This method will grow the output buffer if needed.
|
||||
void AppendString(const char *s);
|
||||
|
||||
// Renderers can call this to append binary byte sequences into
|
||||
// the output string returned by GetOutput. Note that s is not necessarily
|
||||
// '\0' terminated (and can contain '\0' within it).
|
||||
// This method will grow the output buffer if needed.
|
||||
void AppendData(const char *s, int len);
|
||||
|
||||
private:
|
||||
TessResultRenderer *next_; // Can link multiple renderers together
|
||||
FILE *fout_; // output file pointer
|
||||
const char *file_extension_; // standard extension for generated output
|
||||
std::string title_; // title of document being rendered
|
||||
int imagenum_; // index of last image added
|
||||
bool happy_; // I get grumpy when the disk fills up, etc.
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessTextRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessTextRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an hocr text string
|
||||
*/
|
||||
class TESS_API TessHOcrRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessHOcrRenderer(const char *outputbase, bool font_info);
|
||||
explicit TessHOcrRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool font_info_; // whether to print font information
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an alto text string
|
||||
*/
|
||||
class TESS_API TessAltoRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessAltoRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool begin_document;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders Tesseract output into a TSV string
|
||||
*/
|
||||
class TESS_API TessTsvRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessTsvRenderer(const char *outputbase, bool font_info);
|
||||
explicit TessTsvRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool font_info_; // whether to print font information
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into searchable PDF
|
||||
*/
|
||||
class TESS_API TessPDFRenderer : public TessResultRenderer {
|
||||
public:
|
||||
// datadir is the location of the TESSDATA. We need it because
|
||||
// we load a custom PDF font from this location.
|
||||
TessPDFRenderer(const char *outputbase, const char *datadir,
|
||||
bool textonly = false);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
// We don't want to have every image in memory at once,
|
||||
// so we store some metadata as we go along producing
|
||||
// PDFs one page at a time. At the end, that metadata is
|
||||
// used to make everything that isn't easily handled in a
|
||||
// streaming fashion.
|
||||
long int obj_; // counter for PDF objects
|
||||
std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
|
||||
std::vector<long int> pages_; // object number for every /Page object
|
||||
std::string datadir_; // where to find the custom font
|
||||
bool textonly_; // skip images if set
|
||||
// Bookkeeping only. DIY = Do It Yourself.
|
||||
void AppendPDFObjectDIY(size_t objectsize);
|
||||
// Bookkeeping + emit data.
|
||||
void AppendPDFObject(const char *data);
|
||||
// Create the /Contents object for an entire page.
|
||||
char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
|
||||
// Turn an image into a PDF object. Only transcode if we have to.
|
||||
static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
|
||||
char **pdf_object, long int *pdf_object_size,
|
||||
int jpg_quality);
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessUnlvRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessUnlvRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string for LSTMBox
|
||||
*/
|
||||
class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessLSTMBoxRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessBoxTextRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string in WordStr format
|
||||
*/
|
||||
class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessWordStrBoxRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an osd text string
|
||||
*/
|
||||
class TESS_API TessOsdRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessOsdRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_API_RENDERER_H_
|
|
@ -1,250 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: resultiterator.h
|
||||
// Description: Iterator for tesseract results that is capable of
|
||||
// iterating in proper reading order over Bi Directional
|
||||
// (e.g. mixed Hebrew and English) text.
|
||||
// Author: David Eger
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
||||
|
||||
#include "export.h" // for TESS_API, TESS_LOCAL
|
||||
#include "ltrresultiterator.h" // for LTRResultIterator
|
||||
#include "publictypes.h" // for PageIteratorLevel
|
||||
#include "unichar.h" // for StrongScriptDirection
|
||||
|
||||
#include <set> // for std::pair
|
||||
#include <vector> // for std::vector
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class TESS_API ResultIterator : public LTRResultIterator {
|
||||
public:
|
||||
static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
|
||||
|
||||
/**
|
||||
* ResultIterator is copy constructible!
|
||||
* The default copy constructor works just fine for us.
|
||||
*/
|
||||
~ResultIterator() override = default;
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
/**
|
||||
* Moves the iterator to point to the start of the page to begin
|
||||
* an iteration.
|
||||
*/
|
||||
void Begin() override;
|
||||
|
||||
/**
|
||||
* Moves to the start of the next object at the given level in the
|
||||
* page hierarchy in the appropriate reading order and returns false if
|
||||
* the end of the page was reached.
|
||||
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
|
||||
* PageIteratorLevel level values will visit each non-text block once.
|
||||
* Think of non text blocks as containing a single para, with a single line,
|
||||
* with a single imaginary word.
|
||||
* Calls to Next with different levels may be freely intermixed.
|
||||
* This function iterates words in right-to-left scripts correctly, if
|
||||
* the appropriate language has been loaded into Tesseract.
|
||||
*/
|
||||
bool Next(PageIteratorLevel level) override;
|
||||
|
||||
/**
|
||||
* IsAtBeginningOf() returns whether we're at the logical beginning of the
|
||||
* given level. (as opposed to ResultIterator's left-to-right top-to-bottom
|
||||
* order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
|
||||
* For a full description, see pageiterator.h
|
||||
*/
|
||||
bool IsAtBeginningOf(PageIteratorLevel level) const override;
|
||||
|
||||
/**
|
||||
* Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
|
||||
* For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
|
||||
* point at the last word in a paragraph. See PageIterator for full comment.
|
||||
*/
|
||||
bool IsAtFinalElement(PageIteratorLevel level,
|
||||
PageIteratorLevel element) const override;
|
||||
|
||||
// ============= Functions that refer to words only ============.
|
||||
// Returns the number of blanks before the current word.
|
||||
int BlanksBeforeWord() const;
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
/**
|
||||
* Returns the null terminated UTF-8 encoded text string for the current
|
||||
* object at the given level. Use delete [] to free after use.
|
||||
*/
|
||||
virtual char *GetUTF8Text(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns the LSTM choices for every LSTM timestep for the current word.
|
||||
*/
|
||||
virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
|
||||
*GetRawLSTMTimesteps() const;
|
||||
virtual std::vector<std::vector<std::pair<const char *, float>>>
|
||||
*GetBestLSTMSymbolChoices() const;
|
||||
|
||||
/**
|
||||
* Return whether the current paragraph's dominant reading direction
|
||||
* is left-to-right (as opposed to right-to-left).
|
||||
*/
|
||||
bool ParagraphIsLtr() const;
|
||||
|
||||
// ============= Exposed only for testing =============.
|
||||
|
||||
/**
|
||||
* Yields the reading order as a sequence of indices and (optional)
|
||||
* meta-marks for a set of words (given left-to-right).
|
||||
* The meta marks are passed as negative values:
|
||||
* kMinorRunStart Start of minor direction text.
|
||||
* kMinorRunEnd End of minor direction text.
|
||||
* kComplexWord The next indexed word contains both left-to-right and
|
||||
* right-to-left characters and was treated as neutral.
|
||||
*
|
||||
* For example, suppose we have five words in a text line,
|
||||
* indexed [0,1,2,3,4] from the leftmost side of the text line.
|
||||
* The following are all believable reading_orders:
|
||||
*
|
||||
* Left-to-Right (in ltr paragraph):
|
||||
* { 0, 1, 2, 3, 4 }
|
||||
* Left-to-Right (in rtl paragraph):
|
||||
* { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
|
||||
* Right-to-Left (in rtl paragraph):
|
||||
* { 4, 3, 2, 1, 0 }
|
||||
* Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
|
||||
* { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
|
||||
*/
|
||||
static void CalculateTextlineOrder(
|
||||
bool paragraph_is_ltr,
|
||||
const std::vector<StrongScriptDirection> &word_dirs,
|
||||
std::vector<int> *reading_order);
|
||||
|
||||
static const int kMinorRunStart;
|
||||
static const int kMinorRunEnd;
|
||||
static const int kComplexWord;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* We presume the data associated with the given iterator will outlive us.
|
||||
* NB: This is private because it does something that is non-obvious:
|
||||
* it resets to the beginning of the paragraph instead of staying wherever
|
||||
* resit might have pointed.
|
||||
*/
|
||||
explicit ResultIterator(const LTRResultIterator &resit);
|
||||
|
||||
private:
|
||||
/**
|
||||
* Calculates the current paragraph's dominant writing direction.
|
||||
* Typically, members should use current_paragraph_ltr_ instead.
|
||||
*/
|
||||
bool CurrentParagraphIsLtr() const;
|
||||
|
||||
/**
|
||||
* Returns word indices as measured from resit->RestartRow() = index 0
|
||||
* for the reading order of words within a textline given an iterator
|
||||
* into the middle of the text line.
|
||||
* In addition to non-negative word indices, the following negative values
|
||||
* may be inserted:
|
||||
* kMinorRunStart Start of minor direction text.
|
||||
* kMinorRunEnd End of minor direction text.
|
||||
* kComplexWord The previous word contains both left-to-right and
|
||||
* right-to-left characters and was treated as neutral.
|
||||
*/
|
||||
void CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
const LTRResultIterator &resit,
|
||||
std::vector<int> *indices) const;
|
||||
/** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
|
||||
void CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
const LTRResultIterator &resit,
|
||||
std::vector<StrongScriptDirection> *ssd,
|
||||
std::vector<int> *indices) const;
|
||||
|
||||
/**
|
||||
* What is the index of the current word in a strict left-to-right reading
|
||||
* of the row?
|
||||
*/
|
||||
int LTRWordIndex() const;
|
||||
|
||||
/**
|
||||
* Given an iterator pointing at a word, returns the logical reading order
|
||||
* of blob indices for the word.
|
||||
*/
|
||||
void CalculateBlobOrder(std::vector<int> *blob_indices) const;
|
||||
|
||||
/** Precondition: current_paragraph_is_ltr_ is set. */
|
||||
void MoveToLogicalStartOfTextline();
|
||||
|
||||
/**
|
||||
* Precondition: current_paragraph_is_ltr_ and in_minor_direction_
|
||||
* are set.
|
||||
*/
|
||||
void MoveToLogicalStartOfWord();
|
||||
|
||||
/** Are we pointing at the final (reading order) symbol of the word? */
|
||||
bool IsAtFinalSymbolOfWord() const;
|
||||
|
||||
/** Are we pointing at the first (reading order) symbol of the word? */
|
||||
bool IsAtFirstSymbolOfWord() const;
|
||||
|
||||
/**
|
||||
* Append any extra marks that should be appended to this word when printed.
|
||||
* Mostly, these are Unicode BiDi control characters.
|
||||
*/
|
||||
void AppendSuffixMarks(std::string *text) const;
|
||||
|
||||
/** Appends the current word in reading order to the given buffer.*/
|
||||
void AppendUTF8WordText(std::string *text) const;
|
||||
|
||||
/**
|
||||
* Appends the text of the current text line, *assuming this iterator is
|
||||
* positioned at the beginning of the text line* This function
|
||||
* updates the iterator to point to the first position past the text line.
|
||||
* Each textline is terminated in a single newline character.
|
||||
* If the textline ends a paragraph, it gets a second terminal newline.
|
||||
*/
|
||||
void IterateAndAppendUTF8TextlineText(std::string *text);
|
||||
|
||||
/**
|
||||
* Appends the text of the current paragraph in reading order
|
||||
* to the given buffer.
|
||||
* Each textline is terminated in a single newline character, and the
|
||||
* paragraph gets an extra newline at the end.
|
||||
*/
|
||||
void AppendUTF8ParagraphText(std::string *text) const;
|
||||
|
||||
/** Returns whether the bidi_debug flag is set to at least min_level. */
|
||||
bool BidiDebug(int min_level) const;
|
||||
|
||||
bool current_paragraph_is_ltr_;
|
||||
|
||||
/**
|
||||
* Is the currently pointed-at character at the beginning of
|
||||
* a minor-direction run?
|
||||
*/
|
||||
bool at_beginning_of_minor_run_;
|
||||
|
||||
/** Is the currently pointed-at character in a minor-direction sequence? */
|
||||
bool in_minor_direction_;
|
||||
|
||||
/**
|
||||
* Should detected inter-word spaces be preserved, or "compressed" to a single
|
||||
* space character (default behavior).
|
||||
*/
|
||||
bool preserve_interword_spaces_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
|
@ -1,174 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: unichar.h
|
||||
// Description: Unicode character/ligature class.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCUTIL_UNICHAR_H_
|
||||
#define TESSERACT_CCUTIL_UNICHAR_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
#include <memory.h>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Maximum number of characters that can be stored in a UNICHAR. Must be
|
||||
// at least 4. Must not exceed 31 without changing the coding of length.
|
||||
#define UNICHAR_LEN 30
|
||||
|
||||
// A UNICHAR_ID is the unique id of a unichar.
|
||||
using UNICHAR_ID = int;
|
||||
|
||||
// A variable to indicate an invalid or uninitialized unichar id.
|
||||
static const int INVALID_UNICHAR_ID = -1;
|
||||
// A special unichar that corresponds to INVALID_UNICHAR_ID.
|
||||
static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
|
||||
|
||||
enum StrongScriptDirection {
|
||||
DIR_NEUTRAL = 0, // Text contains only neutral characters.
|
||||
DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
|
||||
DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
|
||||
DIR_MIX = 3, // Text contains a mixture of left-to-right
|
||||
// and right-to-left characters.
|
||||
};
|
||||
|
||||
using char32 = signed int;
|
||||
|
||||
// The UNICHAR class holds a single classification result. This may be
|
||||
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
|
||||
// multiple Unicode characters representing the NFKC expansion of a ligature
|
||||
// such as fi, ffl etc. These are also stored as utf8.
|
||||
class TESS_API UNICHAR {
|
||||
public:
|
||||
UNICHAR() {
|
||||
memset(chars, 0, UNICHAR_LEN);
|
||||
}
|
||||
|
||||
// Construct from a utf8 string. If len<0 then the string is null terminated.
|
||||
// If the string is too long to fit in the UNICHAR then it takes only what
|
||||
// will fit.
|
||||
UNICHAR(const char *utf8_str, int len);
|
||||
|
||||
// Construct from a single UCS4 character.
|
||||
explicit UNICHAR(int unicode);
|
||||
|
||||
// Default copy constructor and operator= are OK.
|
||||
|
||||
// Get the first character as UCS-4.
|
||||
int first_uni() const;
|
||||
|
||||
// Get the length of the UTF8 string.
|
||||
int utf8_len() const {
|
||||
int len = chars[UNICHAR_LEN - 1];
|
||||
return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
|
||||
}
|
||||
|
||||
// Get a UTF8 string, but NOT nullptr terminated.
|
||||
const char *utf8() const {
|
||||
return chars;
|
||||
}
|
||||
|
||||
// Get a terminated UTF8 string: Must delete[] it after use.
|
||||
char *utf8_str() const;
|
||||
|
||||
// Get the number of bytes in the first character of the given utf8 string.
|
||||
static int utf8_step(const char *utf8_str);
|
||||
|
||||
// A class to simplify iterating over and accessing elements of a UTF8
|
||||
// string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
|
||||
// take ownership of the underlying byte array. It also does not permit
|
||||
// modification of the array (as the name suggests).
|
||||
//
|
||||
// Example:
|
||||
// for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
|
||||
// it != UNICHAR::end(str, len);
|
||||
// ++it) {
|
||||
// printf("UCS-4 symbol code = %d\n", *it);
|
||||
// char buf[5];
|
||||
// int char_len = it.get_utf8(buf); buf[char_len] = '\0';
|
||||
// printf("Char = %s\n", buf);
|
||||
// }
|
||||
class TESS_API const_iterator {
|
||||
using CI = const_iterator;
|
||||
|
||||
public:
|
||||
// Step to the next UTF8 character.
|
||||
// If the current position is at an illegal UTF8 character, then print an
|
||||
// error message and step by one byte. If the current position is at a
|
||||
// nullptr value, don't step past it.
|
||||
const_iterator &operator++();
|
||||
|
||||
// Return the UCS-4 value at the current position.
|
||||
// If the current position is at an illegal UTF8 value, return a single
|
||||
// space character.
|
||||
int operator*() const;
|
||||
|
||||
// Store the UTF-8 encoding of the current codepoint into buf, which must be
|
||||
// at least 4 bytes long. Return the number of bytes written.
|
||||
// If the current position is at an illegal UTF8 value, writes a single
|
||||
// space character and returns 1.
|
||||
// Note that this method does not null-terminate the buffer.
|
||||
int get_utf8(char *buf) const;
|
||||
// Returns the number of bytes of the current codepoint. Returns 1 if the
|
||||
// current position is at an illegal UTF8 value.
|
||||
int utf8_len() const;
|
||||
// Returns true if the UTF-8 encoding at the current position is legal.
|
||||
bool is_legal() const;
|
||||
|
||||
// Return the pointer into the string at the current position.
|
||||
const char *utf8_data() const {
|
||||
return it_;
|
||||
}
|
||||
|
||||
// Iterator equality operators.
|
||||
friend bool operator==(const CI &lhs, const CI &rhs) {
|
||||
return lhs.it_ == rhs.it_;
|
||||
}
|
||||
friend bool operator!=(const CI &lhs, const CI &rhs) {
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
private:
|
||||
friend class UNICHAR;
|
||||
explicit const_iterator(const char *it) : it_(it) {}
|
||||
|
||||
const char *it_; // Pointer into the string.
|
||||
};
|
||||
|
||||
// Create a start/end iterator pointing to a string. Note that these methods
|
||||
// are static and do NOT create a copy or take ownership of the underlying
|
||||
// array.
|
||||
static const_iterator begin(const char *utf8_str, int byte_length);
|
||||
static const_iterator end(const char *utf8_str, int byte_length);
|
||||
|
||||
// Converts a utf-8 string to a vector of unicodes.
|
||||
// Returns an empty vector if the input contains invalid UTF-8.
|
||||
static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
|
||||
// Converts a vector of unicodes to a utf8 string.
|
||||
// Returns an empty string if the input contains an invalid unicode.
|
||||
static std::string UTF32ToUTF8(const std::vector<char32> &str32);
|
||||
|
||||
private:
|
||||
// A UTF-8 representation of 1 or more Unicode characters.
|
||||
// The last element (chars[UNICHAR_LEN - 1]) is a length if
|
||||
// its value < UNICHAR_LEN, otherwise it is a genuine character.
|
||||
char chars[UNICHAR_LEN]{};
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCUTIL_UNICHAR_H_
|
|
@ -1,34 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: version.h
|
||||
// Description: Version information
|
||||
//
|
||||
// (C) Copyright 2018, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_VERSION_H_
|
||||
#define TESSERACT_API_VERSION_H_
|
||||
|
||||
// clang-format off
|
||||
|
||||
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
|
||||
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
|
||||
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
|
||||
|
||||
#define TESSERACT_VERSION \
|
||||
(TESSERACT_MAJOR_VERSION << 16 | \
|
||||
TESSERACT_MINOR_VERSION << 8 | \
|
||||
TESSERACT_MICRO_VERSION)
|
||||
|
||||
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
|
||||
|
||||
// clang-format on
|
||||
|
||||
#endif // TESSERACT_API_VERSION_H_
|
|
@ -1,812 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: baseapi.h
|
||||
// Description: Simple API for calling tesseract.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_BASEAPI_H_
|
||||
#define TESSERACT_API_BASEAPI_H_
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config_auto.h" // DISABLED_LEGACY_ENGINE
|
||||
#endif
|
||||
|
||||
#include "export.h"
|
||||
#include "pageiterator.h"
|
||||
#include "publictypes.h"
|
||||
#include "resultiterator.h"
|
||||
#include "unichar.h"
|
||||
|
||||
#include "version.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <vector> // for std::vector
|
||||
|
||||
struct Pix;
|
||||
struct Pixa;
|
||||
struct Boxa;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class PAGE_RES;
|
||||
class ParagraphModel;
|
||||
class BLOCK_LIST;
|
||||
class ETEXT_DESC;
|
||||
struct OSResults;
|
||||
class UNICHARSET;
|
||||
|
||||
class Dawg;
|
||||
class Dict;
|
||||
class EquationDetect;
|
||||
class PageIterator;
|
||||
class ImageThresholder;
|
||||
class LTRResultIterator;
|
||||
class ResultIterator;
|
||||
class MutableIterator;
|
||||
class TessResultRenderer;
|
||||
class Tesseract;
|
||||
|
||||
// Function to read a std::vector<char> from a whole file.
|
||||
// Returns false on failure.
|
||||
using FileReader = bool (*)(const char *filename, std::vector<char> *data);
|
||||
|
||||
using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
|
||||
bool) const;
|
||||
using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
|
||||
int, const char *, int);
|
||||
|
||||
/**
|
||||
* Base class for all tesseract APIs.
|
||||
* Specific classes can add ability to work on different inputs or produce
|
||||
* different outputs.
|
||||
* This class is mostly an interface layer on top of the Tesseract instance
|
||||
* class to hide the data types so that users of this class don't have to
|
||||
* include any other Tesseract headers.
|
||||
*/
|
||||
class TESS_API TessBaseAPI {
|
||||
public:
|
||||
TessBaseAPI();
|
||||
virtual ~TessBaseAPI();
|
||||
// Copy constructor and assignment operator are currently unsupported.
|
||||
TessBaseAPI(TessBaseAPI const &) = delete;
|
||||
TessBaseAPI &operator=(TessBaseAPI const &) = delete;
|
||||
|
||||
/**
|
||||
* Returns the version identifier as a static string. Do not delete.
|
||||
*/
|
||||
static const char *Version();
|
||||
|
||||
/**
|
||||
* If compiled with OpenCL AND an available OpenCL
|
||||
* device is deemed faster than serial code, then
|
||||
* "device" is populated with the cl_device_id
|
||||
* and returns sizeof(cl_device_id)
|
||||
* otherwise *device=nullptr and returns 0.
|
||||
*/
|
||||
static size_t getOpenCLDevice(void **device);
|
||||
|
||||
/**
|
||||
* Set the name of the input file. Needed for training and
|
||||
* reading a UNLV zone file, and for searchable PDF output.
|
||||
*/
|
||||
void SetInputName(const char *name);
|
||||
/**
|
||||
* These functions are required for searchable PDF output.
|
||||
* We need our hands on the input file so that we can include
|
||||
* it in the PDF without transcoding. If that is not possible,
|
||||
* we need the original image. Finally, resolution metadata
|
||||
* is stored in the PDF so we need that as well.
|
||||
*/
|
||||
const char *GetInputName();
|
||||
// Takes ownership of the input pix.
|
||||
void SetInputImage(Pix *pix);
|
||||
Pix *GetInputImage();
|
||||
int GetSourceYResolution();
|
||||
const char *GetDatapath();
|
||||
|
||||
/** Set the name of the bonus output files. Needed only for debugging. */
|
||||
void SetOutputName(const char *name);
|
||||
|
||||
/**
|
||||
* Set the value of an internal "parameter."
|
||||
* Supply the name of the parameter and the value as a string, just as
|
||||
* you would in a config file.
|
||||
* Returns false if the name lookup failed.
|
||||
* Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
|
||||
* Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
|
||||
* SetVariable may be used before Init, but settings will revert to
|
||||
* defaults on End().
|
||||
*
|
||||
* Note: Must be called after Init(). Only works for non-init variables
|
||||
* (init variables should be passed to Init()).
|
||||
*/
|
||||
bool SetVariable(const char *name, const char *value);
|
||||
bool SetDebugVariable(const char *name, const char *value);
|
||||
|
||||
/**
|
||||
* Returns true if the parameter was found among Tesseract parameters.
|
||||
* Fills in value with the value of the parameter.
|
||||
*/
|
||||
bool GetIntVariable(const char *name, int *value) const;
|
||||
bool GetBoolVariable(const char *name, bool *value) const;
|
||||
bool GetDoubleVariable(const char *name, double *value) const;
|
||||
|
||||
/**
|
||||
* Returns the pointer to the string that represents the value of the
|
||||
* parameter if it was found among Tesseract parameters.
|
||||
*/
|
||||
const char *GetStringVariable(const char *name) const;
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Print Tesseract fonts table to the given file.
|
||||
*/
|
||||
void PrintFontsTable(FILE *fp) const;
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Print Tesseract parameters to the given file.
|
||||
*/
|
||||
void PrintVariables(FILE *fp) const;
|
||||
|
||||
/**
|
||||
* Get value of named variable as a string, if it exists.
|
||||
*/
|
||||
bool GetVariableAsString(const char *name, std::string *val) const;
|
||||
|
||||
/**
|
||||
* Instances are now mostly thread-safe and totally independent,
|
||||
* but some global parameters remain. Basically it is safe to use multiple
|
||||
* TessBaseAPIs in different threads in parallel, UNLESS:
|
||||
* you use SetVariable on some of the Params in classify and textord.
|
||||
* If you do, then the effect will be to change it for all your instances.
|
||||
*
|
||||
* Start tesseract. Returns zero on success and -1 on failure.
|
||||
* NOTE that the only members that may be called before Init are those
|
||||
* listed above here in the class definition.
|
||||
*
|
||||
* The datapath must be the name of the tessdata directory.
|
||||
* The language is (usually) an ISO 639-3 string or nullptr will default to
|
||||
* eng. It is entirely safe (and eventually will be efficient too) to call
|
||||
* Init multiple times on the same instance to change language, or just
|
||||
* to reset the classifier.
|
||||
* The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
|
||||
* that multiple languages are to be loaded. Eg hin+eng will load Hindi and
|
||||
* English. Languages may specify internally that they want to be loaded
|
||||
* with one or more other languages, so the ~ sign is available to override
|
||||
* that. Eg if hin were set to load eng by default, then hin+~eng would force
|
||||
* loading only hin. The number of loaded languages is limited only by
|
||||
* memory, with the caveat that loading additional languages will impact
|
||||
* both speed and accuracy, as there is more work to do to decide on the
|
||||
* applicable language, and there is more chance of hallucinating incorrect
|
||||
* words.
|
||||
* WARNING: On changing languages, all Tesseract parameters are reset
|
||||
* back to their default values. (Which may vary between languages.)
|
||||
* If you have a rare need to set a Variable that controls
|
||||
* initialization for a second call to Init you should explicitly
|
||||
* call End() and then use SetVariable before Init. This is only a very
|
||||
* rare use case, since there are very few uses that require any parameters
|
||||
* to be set before Init.
|
||||
*
|
||||
* If set_only_non_debug_params is true, only params that do not contain
|
||||
* "debug" in the name will be set.
|
||||
*/
|
||||
int Init(const char *datapath, const char *language, OcrEngineMode mode,
|
||||
char **configs, int configs_size,
|
||||
const std::vector<std::string> *vars_vec,
|
||||
const std::vector<std::string> *vars_values,
|
||||
bool set_only_non_debug_params);
|
||||
int Init(const char *datapath, const char *language, OcrEngineMode oem) {
|
||||
return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
|
||||
}
|
||||
int Init(const char *datapath, const char *language) {
|
||||
return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
|
||||
false);
|
||||
}
|
||||
// In-memory version reads the traineddata file directly from the given
|
||||
// data[data_size] array, and/or reads data via a FileReader.
|
||||
int Init(const char *data, int data_size, const char *language,
|
||||
OcrEngineMode mode, char **configs, int configs_size,
|
||||
const std::vector<std::string> *vars_vec,
|
||||
const std::vector<std::string> *vars_values,
|
||||
bool set_only_non_debug_params, FileReader reader);
|
||||
|
||||
/**
|
||||
* Returns the languages string used in the last valid initialization.
|
||||
* If the last initialization specified "deu+hin" then that will be
|
||||
* returned. If hin loaded eng automatically as well, then that will
|
||||
* not be included in this list. To find the languages actually
|
||||
* loaded use GetLoadedLanguagesAsVector.
|
||||
* The returned string should NOT be deleted.
|
||||
*/
|
||||
const char *GetInitLanguagesAsString() const;
|
||||
|
||||
/**
|
||||
* Returns the loaded languages in the vector of std::string.
|
||||
* Includes all languages loaded by the last Init, including those loaded
|
||||
* as dependencies of other loaded languages.
|
||||
*/
|
||||
void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
|
||||
|
||||
/**
|
||||
* Returns the available languages in the sorted vector of std::string.
|
||||
*/
|
||||
void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
|
||||
|
||||
/**
|
||||
* Init only for page layout analysis. Use only for calls to SetImage and
|
||||
* AnalysePage. Calls that attempt recognition will generate an error.
|
||||
*/
|
||||
void InitForAnalysePage();
|
||||
|
||||
/**
|
||||
* Read a "config" file containing a set of param, value pairs.
|
||||
* Searches the standard places: tessdata/configs, tessdata/tessconfigs
|
||||
* and also accepts a relative or absolute path name.
|
||||
* Note: only non-init params will be set (init params are set by Init()).
|
||||
*/
|
||||
void ReadConfigFile(const char *filename);
|
||||
/** Same as above, but only set debug params from the given config file. */
|
||||
void ReadDebugConfigFile(const char *filename);
|
||||
|
||||
/**
|
||||
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
|
||||
* The mode is stored as an IntParam so it can also be modified by
|
||||
* ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
|
||||
*/
|
||||
void SetPageSegMode(PageSegMode mode);
|
||||
|
||||
/** Return the current page segmentation mode. */
|
||||
PageSegMode GetPageSegMode() const;
|
||||
|
||||
/**
|
||||
* Recognize a rectangle from an image and return the result as a string.
|
||||
* May be called many times for a single Init.
|
||||
* Currently has no error checking.
|
||||
* Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
|
||||
* Palette color images will not work properly and must be converted to
|
||||
* 24 bit.
|
||||
* Binary images of 1 bit per pixel may also be given but they must be
|
||||
* byte packed with the MSB of the first byte being the first pixel, and a
|
||||
* 1 represents WHITE. For binary images set bytes_per_pixel=0.
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
*
|
||||
* Note that TesseractRect is the simplified convenience interface.
|
||||
* For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
|
||||
* and one or more of the Get*Text functions below.
|
||||
*/
|
||||
char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
|
||||
int bytes_per_line, int left, int top, int width,
|
||||
int height);
|
||||
|
||||
/**
|
||||
* Call between pages or documents etc to free up memory and forget
|
||||
* adaptive data.
|
||||
*/
|
||||
void ClearAdaptiveClassifier();
|
||||
|
||||
/**
|
||||
* @defgroup AdvancedAPI Advanced API
|
||||
* The following methods break TesseractRect into pieces, so you can
|
||||
* get hold of the thresholded image, get the text in different formats,
|
||||
* get bounding boxes, confidences etc.
|
||||
*/
|
||||
/* @{ */
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. Format is as
|
||||
* TesseractRect above. Copies the image buffer and converts to Pix.
|
||||
* SetImage clears all recognition results, and sets the rectangle to the
|
||||
* full image, so it may be followed immediately by a GetUTF8Text, and it
|
||||
* will automatically perform recognition.
|
||||
*/
|
||||
void SetImage(const unsigned char *imagedata, int width, int height,
|
||||
int bytes_per_pixel, int bytes_per_line);
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. As with SetImage above,
|
||||
* Tesseract takes its own copy of the image, so it need not persist until
|
||||
* after Recognize.
|
||||
* Pix vs raw, which to use?
|
||||
* Use Pix where possible. Tesseract uses Pix as its internal representation
|
||||
* and it is therefore more efficient to provide a Pix directly.
|
||||
*/
|
||||
void SetImage(Pix *pix);
|
||||
|
||||
/**
|
||||
* Set the resolution of the source image in pixels per inch so font size
|
||||
* information can be calculated in results. Call this after SetImage().
|
||||
*/
|
||||
void SetSourceResolution(int ppi);
|
||||
|
||||
/**
|
||||
* Restrict recognition to a sub-rectangle of the image. Call after SetImage.
|
||||
* Each SetRectangle clears the recogntion results so multiple rectangles
|
||||
* can be recognized with the same image.
|
||||
*/
|
||||
void SetRectangle(int left, int top, int width, int height);
|
||||
|
||||
/**
|
||||
* Get a copy of the internal thresholded image from Tesseract.
|
||||
* Caller takes ownership of the Pix and must pixDestroy it.
|
||||
* May be called any time after SetImage, or after TesseractRect.
|
||||
*/
|
||||
Pix *GetThresholdedImage();
|
||||
|
||||
/**
|
||||
* Get the result of page layout analysis as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
*/
|
||||
Boxa *GetRegions(Pixa **pixa);
|
||||
|
||||
/**
|
||||
* Get the textlines as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* If raw_image is true, then extract from the original image instead of the
|
||||
* thresholded image and pad by raw_padding pixels.
|
||||
* If blockids is not nullptr, the block-id of each line is also returned as
|
||||
* an array of one element per line. delete [] after use. If paraids is not
|
||||
* nullptr, the paragraph-id of each line within its block is also returned as
|
||||
* an array of one element per line. delete [] after use.
|
||||
*/
|
||||
Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
/*
|
||||
Helper method to extract from the thresholded image. (most common usage)
|
||||
*/
|
||||
Boxa *GetTextlines(Pixa **pixa, int **blockids) {
|
||||
return GetTextlines(false, 0, pixa, blockids, nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
|
||||
* pair, in reading order. Enables downstream handling of non-rectangular
|
||||
* regions.
|
||||
* Can be called before or after Recognize.
|
||||
* If blockids is not nullptr, the block-id of each line is also returned as
|
||||
* an array of one element per line. delete [] after use.
|
||||
*/
|
||||
Boxa *GetStrips(Pixa **pixa, int **blockids);
|
||||
|
||||
/**
|
||||
* Get the words as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
*/
|
||||
Boxa *GetWords(Pixa **pixa);
|
||||
|
||||
/**
|
||||
* Gets the individual connected (text) components (created
|
||||
* after pages segmentation step, but before recognition)
|
||||
* as a leptonica-style Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* Note: the caller is responsible for calling boxaDestroy()
|
||||
* on the returned Boxa array and pixaDestroy() on cc array.
|
||||
*/
|
||||
Boxa *GetConnectedComponents(Pixa **cc);
|
||||
|
||||
/**
|
||||
* Get the given level kind of components (block, textline, word etc.) as a
|
||||
* leptonica-style Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* If blockids is not nullptr, the block-id of each component is also returned
|
||||
* as an array of one element per component. delete [] after use.
|
||||
* If blockids is not nullptr, the paragraph-id of each component with its
|
||||
* block is also returned as an array of one element per component. delete []
|
||||
* after use. If raw_image is true, then portions of the original image are
|
||||
* extracted instead of the thresholded image and padded with raw_padding. If
|
||||
* text_only is true, then only text components are returned.
|
||||
*/
|
||||
Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
|
||||
bool raw_image, int raw_padding, Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
// Helper function to get binary images with no padding (most common usage).
|
||||
Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
|
||||
Pixa **pixa, int **blockids) {
|
||||
return GetComponentImages(level, text_only, false, 0, pixa, blockids,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the scale factor of the thresholded image that would be returned by
|
||||
* GetThresholdedImage() and the various GetX() methods that call
|
||||
* GetComponentImages().
|
||||
* Returns 0 if no thresholder has been set.
|
||||
*/
|
||||
int GetThresholdedImageScaleFactor() const;
|
||||
|
||||
/**
|
||||
* Runs page layout analysis in the mode set by SetPageSegMode.
|
||||
* May optionally be called prior to Recognize to get access to just
|
||||
* the page layout results. Returns an iterator to the results.
|
||||
* If merge_similar_words is true, words are combined where suitable for use
|
||||
* with a line recognizer. Use if you want to use AnalyseLayout to find the
|
||||
* textlines, and then want to process textline fragments with an external
|
||||
* line recognizer.
|
||||
* Returns nullptr on error or an empty page.
|
||||
* The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
PageIterator *AnalyseLayout();
|
||||
PageIterator *AnalyseLayout(bool merge_similar_words);
|
||||
|
||||
/**
|
||||
* Recognize the image from SetAndThresholdImage, generating Tesseract
|
||||
* internal structures. Returns 0 on success.
|
||||
* Optional. The Get*Text functions below will call Recognize if needed.
|
||||
* After Recognize, the output is kept internally until the next SetImage.
|
||||
*/
|
||||
int Recognize(ETEXT_DESC *monitor);
|
||||
|
||||
/**
|
||||
* Methods to retrieve information after SetAndThresholdImage(),
|
||||
* Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
|
||||
*/
|
||||
|
||||
/**
|
||||
* Turns images into symbolic text.
|
||||
*
|
||||
* filename can point to a single image, a multi-page TIFF,
|
||||
* or a plain text list of image filenames.
|
||||
*
|
||||
* retry_config is useful for debugging. If not nullptr, you can fall
|
||||
* back to an alternate configuration if a page fails for some
|
||||
* reason.
|
||||
*
|
||||
* timeout_millisec terminates processing if any single page
|
||||
* takes too long. Set to 0 for unlimited time.
|
||||
*
|
||||
* renderer is responible for creating the output. For example,
|
||||
* use the TessTextRenderer if you want plaintext output, or
|
||||
* the TessPDFRender to produce searchable PDF.
|
||||
*
|
||||
* If tessedit_page_number is non-negative, will only process that
|
||||
* single page. Works for multi-page tiff file, or filelist.
|
||||
*
|
||||
* Returns true if successful, false on error.
|
||||
*/
|
||||
bool ProcessPages(const char *filename, const char *retry_config,
|
||||
int timeout_millisec, TessResultRenderer *renderer);
|
||||
// Does the real work of ProcessPages.
|
||||
bool ProcessPagesInternal(const char *filename, const char *retry_config,
|
||||
int timeout_millisec, TessResultRenderer *renderer);
|
||||
|
||||
/**
|
||||
* Turn a single image into symbolic text.
|
||||
*
|
||||
* The pix is the image processed. filename and page_index are
|
||||
* metadata used by side-effect processes, such as reading a box
|
||||
* file or formatting as hOCR.
|
||||
*
|
||||
* See ProcessPages for descriptions of other parameters.
|
||||
*/
|
||||
bool ProcessPage(Pix *pix, int page_index, const char *filename,
|
||||
const char *retry_config, int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
|
||||
/**
|
||||
* Get a reading-order iterator to the results of LayoutAnalysis and/or
|
||||
* Recognize. The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
ResultIterator *GetIterator();
|
||||
|
||||
/**
|
||||
* Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
|
||||
* The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
MutableIterator *GetMutableIterator();
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetUTF8Text();
|
||||
|
||||
/**
|
||||
* Make a HTML-formatted string with hOCR markup from the internal
|
||||
* data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* monitor can be used to
|
||||
* cancel the recognition
|
||||
* receive progress callbacks
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
|
||||
|
||||
/**
|
||||
* Make a HTML-formatted string with hOCR markup from the internal
|
||||
* data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetHOCRText(int page_number);
|
||||
|
||||
/**
|
||||
* Make an XML-formatted string with Alto markup from the internal
|
||||
* data structures.
|
||||
*/
|
||||
char *GetAltoText(ETEXT_DESC *monitor, int page_number);
|
||||
|
||||
/**
|
||||
* Make an XML-formatted string with Alto markup from the internal
|
||||
* data structures.
|
||||
*/
|
||||
char *GetAltoText(int page_number);
|
||||
|
||||
/**
|
||||
* Make a TSV-formatted string from the internal data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetTSVText(int page_number);
|
||||
|
||||
/**
|
||||
* Make a box file for LSTM training from the internal data structures.
|
||||
* Constructs coordinates in the original image - not just the rectangle.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetLSTMBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded in the same
|
||||
* format as a box file used in training.
|
||||
* Constructs coordinates in the original image - not just the rectangle.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded in the same
|
||||
* format as a WordStr box file used in training.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetWordStrBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UNLV format Latin-1 with specific reject and suspect codes.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetUNLVText();
|
||||
|
||||
/**
|
||||
* Detect the orientation of the input image and apparent script (alphabet).
|
||||
* orient_deg is the detected clockwise rotation of the input image in degrees
|
||||
* (0, 90, 180, 270)
|
||||
* orient_conf is the confidence (15.0 is reasonably confident)
|
||||
* script_name is an ASCII string, the name of the script, e.g. "Latin"
|
||||
* script_conf is confidence level in the script
|
||||
* Returns true on success and writes values to each parameter as an output
|
||||
*/
|
||||
bool DetectOrientationScript(int *orient_deg, float *orient_conf,
|
||||
const char **script_name, float *script_conf);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
* page_number is a 0-based page index that will appear in the osd file.
|
||||
*/
|
||||
char *GetOsdText(int page_number);
|
||||
|
||||
/** Returns the (average) confidence value between 0 and 100. */
|
||||
int MeanTextConf();
|
||||
/**
|
||||
* Returns all word confidences (between 0 and 100) in an array, terminated
|
||||
* by -1. The calling function must delete [] after use.
|
||||
* The number of confidences should correspond to the number of space-
|
||||
* delimited words in GetUTF8Text.
|
||||
*/
|
||||
int *AllWordConfidences();
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
/**
|
||||
* Applies the given word to the adaptive classifier if possible.
|
||||
* The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
|
||||
* tell the boundaries of the graphemes.
|
||||
* Assumes that SetImage/SetRectangle have been used to set the image
|
||||
* to the given word. The mode arg should be PSM_SINGLE_WORD or
|
||||
* PSM_CIRCLE_WORD, as that will be used to control layout analysis.
|
||||
* The currently set PageSegMode is preserved.
|
||||
* Returns false if adaption was not possible for some reason.
|
||||
*/
|
||||
bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Free up recognition results and any stored image data, without actually
|
||||
* freeing any recognition data that would be time-consuming to reload.
|
||||
* Afterwards, you must call SetImage or TesseractRect before doing
|
||||
* any Recognize or Get* operation.
|
||||
*/
|
||||
void Clear();
|
||||
|
||||
/**
|
||||
* Close down tesseract and free up all memory. End() is equivalent to
|
||||
* destructing and reconstructing your TessBaseAPI.
|
||||
* Once End() has been used, none of the other API functions may be used
|
||||
* other than Init and anything declared above it in the class definition.
|
||||
*/
|
||||
void End();
|
||||
|
||||
/**
|
||||
* Clear any library-level memory caches.
|
||||
* There are a variety of expensive-to-load constant data structures (mostly
|
||||
* language dictionaries) that are cached globally -- surviving the Init()
|
||||
* and End() of individual TessBaseAPI's. This function allows the clearing
|
||||
* of these caches.
|
||||
**/
|
||||
static void ClearPersistentCache();
|
||||
|
||||
/**
|
||||
* Check whether a word is valid according to Tesseract's language model
|
||||
* @return 0 if the word is invalid, non-zero if valid.
|
||||
* @warning temporary! This function will be removed from here and placed
|
||||
* in a separate API at some future time.
|
||||
*/
|
||||
int IsValidWord(const char *word) const;
|
||||
// Returns true if utf8_character is defined in the UniCharset.
|
||||
bool IsValidCharacter(const char *utf8_character) const;
|
||||
|
||||
bool GetTextDirection(int *out_offset, float *out_slope);
|
||||
|
||||
/** Sets Dict::letter_is_okay_ function to point to the given function. */
|
||||
void SetDictFunc(DictFunc f);
|
||||
|
||||
/** Sets Dict::probability_in_context_ function to point to the given
|
||||
* function.
|
||||
*/
|
||||
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
|
||||
|
||||
/**
|
||||
* Estimates the Orientation And Script of the image.
|
||||
* @return true if the image was processed successfully.
|
||||
*/
|
||||
bool DetectOS(OSResults *);
|
||||
|
||||
/**
|
||||
* Return text orientation of each block as determined by an earlier run
|
||||
* of layout analysis.
|
||||
*/
|
||||
void GetBlockTextOrientations(int **block_orientation,
|
||||
bool **vertical_writing);
|
||||
|
||||
/** This method returns the string form of the specified unichar. */
|
||||
const char *GetUnichar(int unichar_id) const;
|
||||
|
||||
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
|
||||
const Dawg *GetDawg(int i) const;
|
||||
|
||||
/** Return the number of dawgs loaded into tesseract_ object. */
|
||||
int NumDawgs() const;
|
||||
|
||||
Tesseract *tesseract() const {
|
||||
return tesseract_;
|
||||
}
|
||||
|
||||
OcrEngineMode oem() const {
|
||||
return last_oem_requested_;
|
||||
}
|
||||
|
||||
void set_min_orientation_margin(double margin);
|
||||
/* @} */
|
||||
|
||||
protected:
|
||||
/** Common code for setting the image. Returns true if Init has been called.
|
||||
*/
|
||||
bool InternalSetImage();
|
||||
|
||||
/**
|
||||
* Run the thresholder to make the thresholded image. If pix is not nullptr,
|
||||
* the source is thresholded to pix instead of the internal IMAGE.
|
||||
*/
|
||||
virtual bool Threshold(Pix **pix);
|
||||
|
||||
/**
|
||||
* Find lines from the image making the BLOCK_LIST.
|
||||
* @return 0 on success.
|
||||
*/
|
||||
int FindLines();
|
||||
|
||||
/** Delete the pageres and block list ready for a new page. */
|
||||
void ClearResults();
|
||||
|
||||
/**
|
||||
* Return an LTR Result Iterator -- used only for training, as we really want
|
||||
* to ignore all BiDi smarts at that point.
|
||||
* delete once you're done with it.
|
||||
*/
|
||||
LTRResultIterator *GetLTRIterator();
|
||||
|
||||
/**
|
||||
* Return the length of the output text string, as UTF8, assuming
|
||||
* one newline per line and one per block, with a terminator,
|
||||
* and assuming a single character reject marker for each rejected character.
|
||||
* Also return the number of recognized blobs in blob_count.
|
||||
*/
|
||||
int TextLength(int *blob_count) const;
|
||||
|
||||
//// paragraphs.cpp ////////////////////////////////////////////////////
|
||||
void DetectParagraphs(bool after_text_recognition);
|
||||
|
||||
const PAGE_RES *GetPageRes() const {
|
||||
return page_res_;
|
||||
}
|
||||
|
||||
protected:
|
||||
Tesseract *tesseract_; ///< The underlying data object.
|
||||
Tesseract *osd_tesseract_; ///< For orientation & script detection.
|
||||
EquationDetect *equ_detect_; ///< The equation detector.
|
||||
FileReader reader_; ///< Reads files from any filesystem.
|
||||
ImageThresholder *thresholder_; ///< Image thresholding module.
|
||||
std::vector<ParagraphModel *> *paragraph_models_;
|
||||
BLOCK_LIST *block_list_; ///< The page layout.
|
||||
PAGE_RES *page_res_; ///< The page-level data.
|
||||
std::string input_file_; ///< Name used by training code.
|
||||
std::string output_file_; ///< Name used by debug code.
|
||||
std::string datapath_; ///< Current location of tessdata.
|
||||
std::string language_; ///< Last initialized language.
|
||||
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
|
||||
bool recognition_done_; ///< page_res_ contains recognition data.
|
||||
|
||||
/**
|
||||
* @defgroup ThresholderParams Thresholder Parameters
|
||||
* Parameters saved from the Thresholder. Needed to rebuild coordinates.
|
||||
*/
|
||||
/* @{ */
|
||||
int rect_left_;
|
||||
int rect_top_;
|
||||
int rect_width_;
|
||||
int rect_height_;
|
||||
int image_width_;
|
||||
int image_height_;
|
||||
/* @} */
|
||||
|
||||
private:
|
||||
// A list of image filenames gets special consideration
|
||||
bool ProcessPagesFileList(FILE *fp, std::string *buf,
|
||||
const char *retry_config, int timeout_millisec,
|
||||
TessResultRenderer *renderer,
|
||||
int tessedit_page_number);
|
||||
// TIFF supports multipage so gets special consideration.
|
||||
bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
|
||||
const char *filename, const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer,
|
||||
int tessedit_page_number);
|
||||
}; // class TessBaseAPI.
|
||||
|
||||
/** Escape a char string - remove &<>"' with HTML codes. */
|
||||
std::string HOcrEscape(const char *text);
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_API_BASEAPI_H_
|
|
@ -1,484 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: capi.h
|
||||
// Description: C-API TessBaseAPI
|
||||
//
|
||||
// (C) Copyright 2012, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef API_CAPI_H_
|
||||
#define API_CAPI_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
# include <tesseract/baseapi.h>
|
||||
# include <tesseract/ocrclass.h>
|
||||
# include <tesseract/pageiterator.h>
|
||||
# include <tesseract/renderer.h>
|
||||
# include <tesseract/resultiterator.h>
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef BOOL
|
||||
# define BOOL int
|
||||
# define TRUE 1
|
||||
# define FALSE 0
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
typedef tesseract::TessResultRenderer TessResultRenderer;
|
||||
typedef tesseract::TessBaseAPI TessBaseAPI;
|
||||
typedef tesseract::PageIterator TessPageIterator;
|
||||
typedef tesseract::ResultIterator TessResultIterator;
|
||||
typedef tesseract::MutableIterator TessMutableIterator;
|
||||
typedef tesseract::ChoiceIterator TessChoiceIterator;
|
||||
typedef tesseract::OcrEngineMode TessOcrEngineMode;
|
||||
typedef tesseract::PageSegMode TessPageSegMode;
|
||||
typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
|
||||
typedef tesseract::Orientation TessOrientation;
|
||||
typedef tesseract::ParagraphJustification TessParagraphJustification;
|
||||
typedef tesseract::WritingDirection TessWritingDirection;
|
||||
typedef tesseract::TextlineOrder TessTextlineOrder;
|
||||
typedef tesseract::PolyBlockType TessPolyBlockType;
|
||||
typedef tesseract::ETEXT_DESC ETEXT_DESC;
|
||||
#else
|
||||
typedef struct TessResultRenderer TessResultRenderer;
|
||||
typedef struct TessBaseAPI TessBaseAPI;
|
||||
typedef struct TessPageIterator TessPageIterator;
|
||||
typedef struct TessResultIterator TessResultIterator;
|
||||
typedef struct TessMutableIterator TessMutableIterator;
|
||||
typedef struct TessChoiceIterator TessChoiceIterator;
|
||||
typedef enum TessOcrEngineMode {
|
||||
OEM_TESSERACT_ONLY,
|
||||
OEM_LSTM_ONLY,
|
||||
OEM_TESSERACT_LSTM_COMBINED,
|
||||
OEM_DEFAULT
|
||||
} TessOcrEngineMode;
|
||||
typedef enum TessPageSegMode {
|
||||
PSM_OSD_ONLY,
|
||||
PSM_AUTO_OSD,
|
||||
PSM_AUTO_ONLY,
|
||||
PSM_AUTO,
|
||||
PSM_SINGLE_COLUMN,
|
||||
PSM_SINGLE_BLOCK_VERT_TEXT,
|
||||
PSM_SINGLE_BLOCK,
|
||||
PSM_SINGLE_LINE,
|
||||
PSM_SINGLE_WORD,
|
||||
PSM_CIRCLE_WORD,
|
||||
PSM_SINGLE_CHAR,
|
||||
PSM_SPARSE_TEXT,
|
||||
PSM_SPARSE_TEXT_OSD,
|
||||
PSM_RAW_LINE,
|
||||
PSM_COUNT
|
||||
} TessPageSegMode;
|
||||
typedef enum TessPageIteratorLevel {
|
||||
RIL_BLOCK,
|
||||
RIL_PARA,
|
||||
RIL_TEXTLINE,
|
||||
RIL_WORD,
|
||||
RIL_SYMBOL
|
||||
} TessPageIteratorLevel;
|
||||
typedef enum TessPolyBlockType {
|
||||
PT_UNKNOWN,
|
||||
PT_FLOWING_TEXT,
|
||||
PT_HEADING_TEXT,
|
||||
PT_PULLOUT_TEXT,
|
||||
PT_EQUATION,
|
||||
PT_INLINE_EQUATION,
|
||||
PT_TABLE,
|
||||
PT_VERTICAL_TEXT,
|
||||
PT_CAPTION_TEXT,
|
||||
PT_FLOWING_IMAGE,
|
||||
PT_HEADING_IMAGE,
|
||||
PT_PULLOUT_IMAGE,
|
||||
PT_HORZ_LINE,
|
||||
PT_VERT_LINE,
|
||||
PT_NOISE,
|
||||
PT_COUNT
|
||||
} TessPolyBlockType;
|
||||
typedef enum TessOrientation {
|
||||
ORIENTATION_PAGE_UP,
|
||||
ORIENTATION_PAGE_RIGHT,
|
||||
ORIENTATION_PAGE_DOWN,
|
||||
ORIENTATION_PAGE_LEFT
|
||||
} TessOrientation;
|
||||
typedef enum TessParagraphJustification {
|
||||
JUSTIFICATION_UNKNOWN,
|
||||
JUSTIFICATION_LEFT,
|
||||
JUSTIFICATION_CENTER,
|
||||
JUSTIFICATION_RIGHT
|
||||
} TessParagraphJustification;
|
||||
typedef enum TessWritingDirection {
|
||||
WRITING_DIRECTION_LEFT_TO_RIGHT,
|
||||
WRITING_DIRECTION_RIGHT_TO_LEFT,
|
||||
WRITING_DIRECTION_TOP_TO_BOTTOM
|
||||
} TessWritingDirection;
|
||||
typedef enum TessTextlineOrder {
|
||||
TEXTLINE_ORDER_LEFT_TO_RIGHT,
|
||||
TEXTLINE_ORDER_RIGHT_TO_LEFT,
|
||||
TEXTLINE_ORDER_TOP_TO_BOTTOM
|
||||
} TessTextlineOrder;
|
||||
typedef struct ETEXT_DESC ETEXT_DESC;
|
||||
#endif
|
||||
|
||||
typedef bool (*TessCancelFunc)(void *cancel_this, int words);
|
||||
typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
|
||||
int bottom);
|
||||
|
||||
struct Pix;
|
||||
struct Boxa;
|
||||
struct Pixa;
|
||||
|
||||
/* General free functions */
|
||||
|
||||
TESS_API const char *TessVersion();
|
||||
TESS_API void TessDeleteText(const char *text);
|
||||
TESS_API void TessDeleteTextArray(char **arr);
|
||||
TESS_API void TessDeleteIntArray(const int *arr);
|
||||
|
||||
/* Renderer API */
|
||||
TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
|
||||
BOOL font_info);
|
||||
TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
|
||||
const char *datadir,
|
||||
BOOL textonly);
|
||||
TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
|
||||
const char *outputbase);
|
||||
|
||||
TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
|
||||
TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
|
||||
TessResultRenderer *next);
|
||||
TESS_API TessResultRenderer *TessResultRendererNext(
|
||||
TessResultRenderer *renderer);
|
||||
TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
|
||||
const char *title);
|
||||
TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
|
||||
TessBaseAPI *api);
|
||||
TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
|
||||
|
||||
TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
|
||||
TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
|
||||
TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
|
||||
|
||||
/* Base API */
|
||||
|
||||
TESS_API TessBaseAPI *TessBaseAPICreate();
|
||||
TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
|
||||
|
||||
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
|
||||
|
||||
TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
|
||||
TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
|
||||
TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
|
||||
TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
|
||||
|
||||
TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
|
||||
const char *value);
|
||||
TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
|
||||
const char *value);
|
||||
|
||||
TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
|
||||
const char *name, int *value);
|
||||
TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
|
||||
const char *name, BOOL *value);
|
||||
TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
|
||||
const char *name, double *value);
|
||||
TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
|
||||
const char *name);
|
||||
|
||||
TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
|
||||
TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
|
||||
TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode oem,
|
||||
char **configs, int configs_size);
|
||||
TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode oem);
|
||||
TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language);
|
||||
|
||||
TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode mode,
|
||||
char **configs, int configs_size, char **vars_vec,
|
||||
char **vars_values, size_t vars_vec_size,
|
||||
BOOL set_only_non_debug_params);
|
||||
|
||||
TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
|
||||
const char *language, TessOcrEngineMode mode,
|
||||
char **configs, int configs_size, char **vars_vec,
|
||||
char **vars_values, size_t vars_vec_size,
|
||||
BOOL set_only_non_debug_params);
|
||||
|
||||
TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
|
||||
const TessBaseAPI *handle);
|
||||
TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
|
||||
const TessBaseAPI *handle);
|
||||
TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
|
||||
const TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
|
||||
TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
|
||||
TessPageSegMode mode);
|
||||
TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
|
||||
const unsigned char *imagedata,
|
||||
int bytes_per_pixel, int bytes_per_line,
|
||||
int left, int top, int width, int height);
|
||||
|
||||
TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
|
||||
const unsigned char *imagedata, int width,
|
||||
int height, int bytes_per_pixel,
|
||||
int bytes_per_line);
|
||||
TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
|
||||
|
||||
TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
|
||||
|
||||
TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
|
||||
int width, int height);
|
||||
|
||||
TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
|
||||
TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
|
||||
struct Pixa **pixa);
|
||||
TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
|
||||
struct Pixa **pixa,
|
||||
int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
|
||||
BOOL raw_image, int raw_padding,
|
||||
struct Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
|
||||
struct Pixa **pixa, int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
|
||||
struct Pixa **pixa);
|
||||
TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
|
||||
struct Pixa **cc);
|
||||
TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
|
||||
TessPageIteratorLevel level,
|
||||
BOOL text_only,
|
||||
struct Pixa **pixa,
|
||||
int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
|
||||
TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
|
||||
BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
|
||||
int **paraids);
|
||||
|
||||
TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
|
||||
const TessBaseAPI *handle);
|
||||
|
||||
TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
|
||||
|
||||
TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
|
||||
const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
|
||||
int page_index, const char *filename,
|
||||
const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
|
||||
TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
|
||||
TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
|
||||
TessBaseAPI *handle);
|
||||
|
||||
TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
|
||||
TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
|
||||
int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
|
||||
TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
|
||||
TessPageSegMode mode,
|
||||
const char *wordstr);
|
||||
#endif // #ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
|
||||
TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
|
||||
TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
|
||||
float *out_slope);
|
||||
|
||||
TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
|
||||
|
||||
TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
// Call TessDeleteText(*best_script_name) to free memory allocated by this
|
||||
// function
|
||||
TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
|
||||
int *orient_deg,
|
||||
float *orient_conf,
|
||||
const char **script_name,
|
||||
float *script_conf);
|
||||
#endif // #ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
|
||||
double margin);
|
||||
|
||||
TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
|
||||
int **block_orientation,
|
||||
bool **vertical_writing);
|
||||
|
||||
/* Page iterator */
|
||||
|
||||
TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
|
||||
|
||||
TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
|
||||
|
||||
TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
|
||||
|
||||
TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
|
||||
TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
|
||||
TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
TessPageIteratorLevel element);
|
||||
|
||||
TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
int *left, int *top, int *right,
|
||||
int *bottom);
|
||||
|
||||
TESS_API TessPolyBlockType
|
||||
TessPageIteratorBlockType(const TessPageIterator *handle);
|
||||
|
||||
TESS_API struct Pix *TessPageIteratorGetBinaryImage(
|
||||
const TessPageIterator *handle, TessPageIteratorLevel level);
|
||||
|
||||
TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
int padding,
|
||||
struct Pix *original_image,
|
||||
int *left, int *top);
|
||||
|
||||
TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level, int *x1,
|
||||
int *y1, int *x2, int *y2);
|
||||
|
||||
TESS_API void TessPageIteratorOrientation(
|
||||
TessPageIterator *handle, TessOrientation *orientation,
|
||||
TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
|
||||
float *deskew_angle);
|
||||
|
||||
TESS_API void TessPageIteratorParagraphInfo(
|
||||
TessPageIterator *handle, TessParagraphJustification *justification,
|
||||
BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
|
||||
|
||||
/* Result iterator */
|
||||
|
||||
TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
|
||||
TESS_API TessResultIterator *TessResultIteratorCopy(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
|
||||
TessResultIterator *handle);
|
||||
TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
|
||||
const TessResultIterator *handle);
|
||||
|
||||
TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API const char *TessResultIteratorWordRecognitionLanguage(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API const char *TessResultIteratorWordFontAttributes(
|
||||
const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
|
||||
BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
|
||||
int *pointsize, int *font_id);
|
||||
|
||||
TESS_API BOOL
|
||||
TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
|
||||
TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
|
||||
|
||||
TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
|
||||
TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
|
||||
TESS_API const char *TessChoiceIteratorGetUTF8Text(
|
||||
const TessChoiceIterator *handle);
|
||||
TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
|
||||
|
||||
/* Progress monitor */
|
||||
|
||||
TESS_API ETEXT_DESC *TessMonitorCreate();
|
||||
TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
|
||||
TessCancelFunc cancelFunc);
|
||||
TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
|
||||
TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
|
||||
TessProgressFunc progressFunc);
|
||||
TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // API_CAPI_H_
|
|
@ -1,37 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: export.h
|
||||
// Description: Place holder
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_PLATFORM_H_
|
||||
#define TESSERACT_PLATFORM_H_
|
||||
|
||||
#ifndef TESS_API
|
||||
# if defined(_WIN32) || defined(__CYGWIN__)
|
||||
# if defined(TESS_EXPORTS)
|
||||
# define TESS_API __declspec(dllexport)
|
||||
# elif defined(TESS_IMPORTS)
|
||||
# define TESS_API __declspec(dllimport)
|
||||
# else
|
||||
# define TESS_API
|
||||
# endif
|
||||
# else
|
||||
# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
|
||||
# define TESS_API __attribute__((visibility("default")))
|
||||
# else
|
||||
# define TESS_API
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif // TESSERACT_PLATFORM_H_
|
|
@ -1,235 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: ltrresultiterator.h
|
||||
// Description: Iterator for tesseract results in strict left-to-right
|
||||
// order that avoids using tesseract internal data structures.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
||||
|
||||
#include "export.h" // for TESS_API
|
||||
#include "pageiterator.h" // for PageIterator
|
||||
#include "publictypes.h" // for PageIteratorLevel
|
||||
#include "unichar.h" // for StrongScriptDirection
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class BLOB_CHOICE_IT;
|
||||
class PAGE_RES;
|
||||
class WERD_RES;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
// Class to iterate over tesseract results, providing access to all levels
|
||||
// of the page hierarchy, without including any tesseract headers or having
|
||||
// to handle any tesseract structures.
|
||||
// WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
// therefore can only be used while the TessBaseAPI class still exists and
|
||||
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
// DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
// See tesseract/publictypes.h for the definition of PageIteratorLevel.
|
||||
// See also base class PageIterator, which contains the bulk of the interface.
|
||||
// LTRResultIterator adds text-specific methods for access to OCR output.
|
||||
|
||||
class TESS_API LTRResultIterator : public PageIterator {
|
||||
friend class ChoiceIterator;
|
||||
|
||||
public:
|
||||
// page_res and tesseract come directly from the BaseAPI.
|
||||
// The rectangle parameters are copied indirectly from the Thresholder,
|
||||
// via the BaseAPI. They represent the coordinates of some rectangle in an
|
||||
// original image (in top-left-origin coordinates) and therefore the top-left
|
||||
// needs to be added to any output boxes in order to specify coordinates
|
||||
// in the original image. See TessBaseAPI::SetRectangle.
|
||||
// The scale and scaled_yres are in case the Thresholder scaled the image
|
||||
// rectangle prior to thresholding. Any coordinates in tesseract's image
|
||||
// must be divided by scale before adding (rect_left, rect_top).
|
||||
// The scaled_yres indicates the effective resolution of the binary image
|
||||
// that tesseract has been given by the Thresholder.
|
||||
// After the constructor, Begin has already been called.
|
||||
LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
|
||||
int scaled_yres, int rect_left, int rect_top,
|
||||
int rect_width, int rect_height);
|
||||
|
||||
~LTRResultIterator() override;
|
||||
|
||||
// LTRResultIterators may be copied! This makes it possible to iterate over
|
||||
// all the objects at a lower level, while maintaining an iterator to
|
||||
// objects at a higher level. These constructors DO NOT CALL Begin, so
|
||||
// iterations will continue from the location of src.
|
||||
// TODO: For now the copy constructor and operator= only need the base class
|
||||
// versions, but if new data members are added, don't forget to add them!
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
|
||||
// See PageIterator.
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
// Returns the null terminated UTF-8 encoded text string for the current
|
||||
// object at the given level. Use delete [] to free after use.
|
||||
char *GetUTF8Text(PageIteratorLevel level) const;
|
||||
|
||||
// Set the string inserted at the end of each text line. "\n" by default.
|
||||
void SetLineSeparator(const char *new_line);
|
||||
|
||||
// Set the string inserted at the end of each paragraph. "\n" by default.
|
||||
void SetParagraphSeparator(const char *new_para);
|
||||
|
||||
// Returns the mean confidence of the current object at the given level.
|
||||
// The number should be interpreted as a percent probability. (0.0f-100.0f)
|
||||
float Confidence(PageIteratorLevel level) const;
|
||||
|
||||
// ============= Functions that refer to words only ============.
|
||||
|
||||
// Returns the font attributes of the current word. If iterating at a higher
|
||||
// level object than words, eg textlines, then this will return the
|
||||
// attributes of the first word in that textline.
|
||||
// The actual return value is a string representing a font name. It points
|
||||
// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
|
||||
// the iterator itself, ie rendered invalid by various members of
|
||||
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
|
||||
// Pointsize is returned in printers points (1/72 inch.)
|
||||
const char *WordFontAttributes(bool *is_bold, bool *is_italic,
|
||||
bool *is_underlined, bool *is_monospace,
|
||||
bool *is_serif, bool *is_smallcaps,
|
||||
int *pointsize, int *font_id) const;
|
||||
|
||||
// Return the name of the language used to recognize this word.
|
||||
// On error, nullptr. Do not delete this pointer.
|
||||
const char *WordRecognitionLanguage() const;
|
||||
|
||||
// Return the overall directionality of this word.
|
||||
StrongScriptDirection WordDirection() const;
|
||||
|
||||
// Returns true if the current word was found in a dictionary.
|
||||
bool WordIsFromDictionary() const;
|
||||
|
||||
// Returns the number of blanks before the current word.
|
||||
int BlanksBeforeWord() const;
|
||||
|
||||
// Returns true if the current word is numeric.
|
||||
bool WordIsNumeric() const;
|
||||
|
||||
// Returns true if the word contains blamer information.
|
||||
bool HasBlamerInfo() const;
|
||||
|
||||
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
|
||||
// of the current word.
|
||||
const void *GetParamsTrainingBundle() const;
|
||||
|
||||
// Returns a pointer to the string with blamer information for this word.
|
||||
// Assumes that the word's blamer_bundle is not nullptr.
|
||||
const char *GetBlamerDebug() const;
|
||||
|
||||
// Returns a pointer to the string with misadaption information for this word.
|
||||
// Assumes that the word's blamer_bundle is not nullptr.
|
||||
const char *GetBlamerMisadaptionDebug() const;
|
||||
|
||||
// Returns true if a truth string was recorded for the current word.
|
||||
bool HasTruthString() const;
|
||||
|
||||
// Returns true if the given string is equivalent to the truth string for
|
||||
// the current word.
|
||||
bool EquivalentToTruth(const char *str) const;
|
||||
|
||||
// Returns a null terminated UTF-8 encoded truth string for the current word.
|
||||
// Use delete [] to free after use.
|
||||
char *WordTruthUTF8Text() const;
|
||||
|
||||
// Returns a null terminated UTF-8 encoded normalized OCR string for the
|
||||
// current word. Use delete [] to free after use.
|
||||
char *WordNormedUTF8Text() const;
|
||||
|
||||
// Returns a pointer to serialized choice lattice.
|
||||
// Fills lattice_size with the number of bytes in lattice data.
|
||||
const char *WordLattice(int *lattice_size) const;
|
||||
|
||||
// ============= Functions that refer to symbols only ============.
|
||||
|
||||
// Returns true if the current symbol is a superscript.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsSuperscript() const;
|
||||
// Returns true if the current symbol is a subscript.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsSubscript() const;
|
||||
// Returns true if the current symbol is a dropcap.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsDropcap() const;
|
||||
|
||||
protected:
|
||||
const char *line_separator_;
|
||||
const char *paragraph_separator_;
|
||||
};
|
||||
|
||||
// Class to iterate over the classifier choices for a single RIL_SYMBOL.
|
||||
class TESS_API ChoiceIterator {
|
||||
public:
|
||||
// Construction is from a LTRResultIterator that points to the symbol of
|
||||
// interest. The ChoiceIterator allows a one-shot iteration over the
|
||||
// choices for this symbol and after that it is useless.
|
||||
explicit ChoiceIterator(const LTRResultIterator &result_it);
|
||||
~ChoiceIterator();
|
||||
|
||||
// Moves to the next choice for the symbol and returns false if there
|
||||
// are none left.
|
||||
bool Next();
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
// Returns the null terminated UTF-8 encoded text string for the current
|
||||
// choice.
|
||||
// NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
|
||||
// internal structure and should NOT be delete[]ed to free after use.
|
||||
const char *GetUTF8Text() const;
|
||||
|
||||
// Returns the confidence of the current choice depending on the used language
|
||||
// data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
|
||||
// choices for one symbol should roughly add up to 1.0f.
|
||||
// If only traineddata of the legacy engine is used, the number should be
|
||||
// interpreted as a percent probability. (0.0f-100.0f) In this case
|
||||
// probabilities won't add up to 100. Each one stands on its own.
|
||||
float Confidence() const;
|
||||
|
||||
// Returns a vector containing all timesteps, which belong to the currently
|
||||
// selected symbol. A timestep is a vector containing pairs of symbols and
|
||||
// floating point numbers. The number states the probability for the
|
||||
// corresponding symbol.
|
||||
std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
|
||||
|
||||
private:
|
||||
// clears the remaining spaces out of the results and adapt the probabilities
|
||||
void filterSpaces();
|
||||
// Pointer to the WERD_RES object owned by the API.
|
||||
WERD_RES *word_res_;
|
||||
// Iterator over the blob choices.
|
||||
BLOB_CHOICE_IT *choice_it_;
|
||||
std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
|
||||
std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
|
||||
|
||||
const int *tstep_index_;
|
||||
// regulates the rating granularity
|
||||
double rating_coefficient_;
|
||||
// leading blanks
|
||||
int blanks_before_word_;
|
||||
// true when there is lstm engine related trained data
|
||||
bool oemLSTM_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
|
@ -1,158 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
/**********************************************************************
|
||||
* File: ocrclass.h
|
||||
* Description: Class definitions and constants for the OCR API.
|
||||
* Author: Hewlett-Packard Co
|
||||
*
|
||||
* (C) Copyright 1996, Hewlett-Packard Co.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
/**********************************************************************
|
||||
* This file contains typedefs for all the structures used by
|
||||
* the HP OCR interface.
|
||||
* The structures are designed to allow them to be used with any
|
||||
* structure alignment up to 8.
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef CCUTIL_OCRCLASS_H_
|
||||
#define CCUTIL_OCRCLASS_H_
|
||||
|
||||
#include <chrono>
|
||||
#include <ctime>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/**********************************************************************
|
||||
* EANYCODE_CHAR
|
||||
* Description of a single character. The character code is defined by
|
||||
* the character set of the current font.
|
||||
* Output text is sent as an array of these structures.
|
||||
* Spaces and line endings in the output are represented in the
|
||||
* structures of the surrounding characters. They are not directly
|
||||
* represented as characters.
|
||||
* The first character in a word has a positive value of blanks.
|
||||
* Missing information should be set to the defaults in the comments.
|
||||
* If word bounds are known, but not character bounds, then the top and
|
||||
* bottom of each character should be those of the word. The left of the
|
||||
* first and right of the last char in each word should be set. All other
|
||||
* lefts and rights should be set to -1.
|
||||
* If set, the values of right and bottom are left+width and top+height.
|
||||
* Most of the members come directly from the parameters to ocr_append_char.
|
||||
* The formatting member uses the enhancement parameter and combines the
|
||||
* line direction stuff into the top 3 bits.
|
||||
* The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
|
||||
* 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
|
||||
* the coding is, only that it is backwards compatible with the previous
|
||||
* version.
|
||||
**********************************************************************/
|
||||
|
||||
struct EANYCODE_CHAR { /*single character */
|
||||
// It should be noted that the format for char_code for version 2.0 and beyond
|
||||
// is UTF8 which means that ASCII characters will come out as one structure
|
||||
// but other characters will be returned in two or more instances of this
|
||||
// structure with a single byte of the UTF8 code in each, but each will have
|
||||
// the same bounding box. Programs which want to handle languagues with
|
||||
// different characters sets will need to handle extended characters
|
||||
// appropriately, but *all* code needs to be prepared to receive UTF8 coded
|
||||
// characters for characters such as bullet and fancy quotes.
|
||||
uint16_t char_code; /*character itself */
|
||||
int16_t left; /*of char (-1) */
|
||||
int16_t right; /*of char (-1) */
|
||||
int16_t top; /*of char (-1) */
|
||||
int16_t bottom; /*of char (-1) */
|
||||
int16_t font_index; /*what font (0) */
|
||||
uint8_t confidence; /*0=perfect, 100=reject (0/100) */
|
||||
uint8_t point_size; /*of char, 72=i inch, (10) */
|
||||
int8_t blanks; /*no of spaces before this char (1) */
|
||||
uint8_t formatting; /*char formatting (0) */
|
||||
};
|
||||
|
||||
/**********************************************************************
|
||||
* ETEXT_DESC
|
||||
* Description of the output of the OCR engine.
|
||||
* This structure is used as both a progress monitor and the final
|
||||
* output header, since it needs to be a valid progress monitor while
|
||||
* the OCR engine is storing its output to shared memory.
|
||||
* During progress, all the buffer info is -1.
|
||||
* Progress starts at 0 and increases to 100 during OCR. No other constraint.
|
||||
* Additionally the progress callback contains the bounding box of the word that
|
||||
* is currently being processed.
|
||||
* Every progress callback, the OCR engine must set ocr_alive to 1.
|
||||
* The HP side will set ocr_alive to 0. Repeated failure to reset
|
||||
* to 1 indicates that the OCR engine is dead.
|
||||
* If the cancel function is not null then it is called with the number of
|
||||
* user words found. If it returns true then operation is cancelled.
|
||||
**********************************************************************/
|
||||
class ETEXT_DESC;
|
||||
|
||||
using CANCEL_FUNC = bool (*)(void *, int);
|
||||
using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
|
||||
using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
|
||||
|
||||
class ETEXT_DESC { // output header
|
||||
public:
|
||||
int16_t count{0}; /// chars in this buffer(0)
|
||||
int16_t progress{0}; /// percent complete increasing (0-100)
|
||||
/** Progress monitor covers word recognition and it does not cover layout
|
||||
* analysis.
|
||||
* See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
|
||||
int8_t more_to_come{0}; /// true if not last
|
||||
volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
|
||||
int8_t err_code{0}; /// for errcode use
|
||||
CANCEL_FUNC cancel{nullptr}; /// returns true to cancel
|
||||
PROGRESS_FUNC progress_callback{
|
||||
nullptr}; /// called whenever progress increases
|
||||
PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
|
||||
void *cancel_this{nullptr}; /// this or other data for cancel
|
||||
std::chrono::steady_clock::time_point end_time;
|
||||
/// Time to stop. Expected to be set only
|
||||
/// by call to set_deadline_msecs().
|
||||
EANYCODE_CHAR text[1]{}; /// character data
|
||||
|
||||
ETEXT_DESC() : progress_callback2(&default_progress_func) {
|
||||
end_time = std::chrono::time_point<std::chrono::steady_clock,
|
||||
std::chrono::milliseconds>();
|
||||
}
|
||||
|
||||
// Sets the end time to be deadline_msecs milliseconds from now.
|
||||
void set_deadline_msecs(int32_t deadline_msecs) {
|
||||
if (deadline_msecs > 0) {
|
||||
end_time = std::chrono::steady_clock::now() +
|
||||
std::chrono::milliseconds(deadline_msecs);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns false if we've not passed the end_time, or have not set a deadline.
|
||||
bool deadline_exceeded() const {
|
||||
if (end_time.time_since_epoch() ==
|
||||
std::chrono::steady_clock::duration::zero()) {
|
||||
return false;
|
||||
}
|
||||
auto now = std::chrono::steady_clock::now();
|
||||
return (now > end_time);
|
||||
}
|
||||
|
||||
private:
|
||||
static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
|
||||
int top, int bottom) {
|
||||
if (ths->progress_callback != nullptr) {
|
||||
return (*(ths->progress_callback))(ths->progress, left, right, top,
|
||||
bottom);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // CCUTIL_OCRCLASS_H_
|
|
@ -1,139 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: osdetect.h
|
||||
// Description: Orientation and script detection.
|
||||
// Author: Samuel Charron
|
||||
// Ranjith Unnikrishnan
|
||||
//
|
||||
// (C) Copyright 2008, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_OSDETECT_H_
|
||||
#define TESSERACT_CCMAIN_OSDETECT_H_
|
||||
|
||||
#include "export.h" // for TESS_API
|
||||
|
||||
#include <vector> // for std::vector
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class BLOBNBOX;
|
||||
class BLOBNBOX_CLIST;
|
||||
class BLOB_CHOICE_LIST;
|
||||
class TO_BLOCK_LIST;
|
||||
class UNICHARSET;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
|
||||
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
|
||||
|
||||
struct OSBestResult {
|
||||
OSBestResult()
|
||||
: orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
|
||||
int orientation_id;
|
||||
int script_id;
|
||||
float sconfidence;
|
||||
float oconfidence;
|
||||
};
|
||||
|
||||
struct OSResults {
|
||||
OSResults() : unicharset(nullptr) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
|
||||
scripts_na[i][j] = 0;
|
||||
}
|
||||
orientations[i] = 0;
|
||||
}
|
||||
}
|
||||
void update_best_orientation();
|
||||
// Set the estimate of the orientation to the given id.
|
||||
void set_best_orientation(int orientation_id);
|
||||
// Update/Compute the best estimate of the script assuming the given
|
||||
// orientation id.
|
||||
void update_best_script(int orientation_id);
|
||||
// Return the index of the script with the highest score for this orientation.
|
||||
TESS_API int get_best_script(int orientation_id) const;
|
||||
// Accumulate scores with given OSResults instance and update the best script.
|
||||
void accumulate(const OSResults &osr);
|
||||
|
||||
// Print statistics.
|
||||
void print_scores(void) const;
|
||||
void print_scores(int orientation_id) const;
|
||||
|
||||
// Array holding scores for each orientation id [0,3].
|
||||
// Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
|
||||
// page respectively, where the values refer to the amount of clockwise
|
||||
// rotation to be applied to the page for the text to be upright and readable.
|
||||
float orientations[4];
|
||||
// Script confidence scores for each of 4 possible orientations.
|
||||
float scripts_na[4][kMaxNumberOfScripts];
|
||||
|
||||
UNICHARSET *unicharset;
|
||||
OSBestResult best_result;
|
||||
};
|
||||
|
||||
class OrientationDetector {
|
||||
public:
|
||||
OrientationDetector(const std::vector<int> *allowed_scripts,
|
||||
OSResults *results);
|
||||
bool detect_blob(BLOB_CHOICE_LIST *scores);
|
||||
int get_orientation();
|
||||
|
||||
private:
|
||||
OSResults *osr_;
|
||||
const std::vector<int> *allowed_scripts_;
|
||||
};
|
||||
|
||||
class ScriptDetector {
|
||||
public:
|
||||
ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
void detect_blob(BLOB_CHOICE_LIST *scores);
|
||||
bool must_stop(int orientation) const;
|
||||
|
||||
private:
|
||||
OSResults *osr_;
|
||||
static const char *korean_script_;
|
||||
static const char *japanese_script_;
|
||||
static const char *fraktur_script_;
|
||||
int korean_id_;
|
||||
int japanese_id_;
|
||||
int katakana_id_;
|
||||
int hiragana_id_;
|
||||
int han_id_;
|
||||
int hangul_id_;
|
||||
int latin_id_;
|
||||
int fraktur_id_;
|
||||
tesseract::Tesseract *tess_;
|
||||
const std::vector<int> *allowed_scripts_;
|
||||
};
|
||||
|
||||
int orientation_and_script_detection(const char *filename, OSResults *,
|
||||
tesseract::Tesseract *);
|
||||
|
||||
int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
|
||||
int os_detect_blobs(const std::vector<int> *allowed_scripts,
|
||||
BLOBNBOX_CLIST *blob_list, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
|
||||
bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
|
||||
OSResults *, tesseract::Tesseract *tess);
|
||||
|
||||
// Helper method to convert an orientation index to its value in degrees.
|
||||
// The value represents the amount of clockwise rotation in degrees that must be
|
||||
// applied for the text to be upright (readable).
|
||||
TESS_API int OrientationIdToValue(const int &id);
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCMAIN_OSDETECT_H_
|
|
@ -1,364 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: pageiterator.h
|
||||
// Description: Iterator for tesseract page structure that avoids using
|
||||
// tesseract internal data structures.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_PAGEITERATOR_H_
|
||||
|
||||
#include "export.h"
|
||||
#include "publictypes.h"
|
||||
|
||||
struct Pix;
|
||||
struct Pta;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
struct BlamerBundle;
|
||||
class C_BLOB_IT;
|
||||
class PAGE_RES;
|
||||
class PAGE_RES_IT;
|
||||
class WERD;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
/**
|
||||
* Class to iterate over tesseract page structure, providing access to all
|
||||
* levels of the page hierarchy, without including any tesseract headers or
|
||||
* having to handle any tesseract structures.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
* See tesseract/publictypes.h for the definition of PageIteratorLevel.
|
||||
* See also ResultIterator, derived from PageIterator, which adds in the
|
||||
* ability to access OCR output with text-specific methods.
|
||||
*/
|
||||
|
||||
class TESS_API PageIterator {
|
||||
public:
|
||||
/**
|
||||
* page_res and tesseract come directly from the BaseAPI.
|
||||
* The rectangle parameters are copied indirectly from the Thresholder,
|
||||
* via the BaseAPI. They represent the coordinates of some rectangle in an
|
||||
* original image (in top-left-origin coordinates) and therefore the top-left
|
||||
* needs to be added to any output boxes in order to specify coordinates
|
||||
* in the original image. See TessBaseAPI::SetRectangle.
|
||||
* The scale and scaled_yres are in case the Thresholder scaled the image
|
||||
* rectangle prior to thresholding. Any coordinates in tesseract's image
|
||||
* must be divided by scale before adding (rect_left, rect_top).
|
||||
* The scaled_yres indicates the effective resolution of the binary image
|
||||
* that tesseract has been given by the Thresholder.
|
||||
* After the constructor, Begin has already been called.
|
||||
*/
|
||||
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
|
||||
int scaled_yres, int rect_left, int rect_top, int rect_width,
|
||||
int rect_height);
|
||||
virtual ~PageIterator();
|
||||
|
||||
/**
|
||||
* Page/ResultIterators may be copied! This makes it possible to iterate over
|
||||
* all the objects at a lower level, while maintaining an iterator to
|
||||
* objects at a higher level. These constructors DO NOT CALL Begin, so
|
||||
* iterations will continue from the location of src.
|
||||
*/
|
||||
PageIterator(const PageIterator &src);
|
||||
const PageIterator &operator=(const PageIterator &src);
|
||||
|
||||
/** Are we positioned at the same location as other? */
|
||||
bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
|
||||
/**
|
||||
* Moves the iterator to point to the start of the page to begin an
|
||||
* iteration.
|
||||
*/
|
||||
virtual void Begin();
|
||||
|
||||
/**
|
||||
* Moves the iterator to the beginning of the paragraph.
|
||||
* This class implements this functionality by moving it to the zero indexed
|
||||
* blob of the first (leftmost) word on the first row of the paragraph.
|
||||
*/
|
||||
virtual void RestartParagraph();
|
||||
|
||||
/**
|
||||
* Return whether this iterator points anywhere in the first textline of a
|
||||
* paragraph.
|
||||
*/
|
||||
bool IsWithinFirstTextlineOfParagraph() const;
|
||||
|
||||
/**
|
||||
* Moves the iterator to the beginning of the text line.
|
||||
* This class implements this functionality by moving it to the zero indexed
|
||||
* blob of the first (leftmost) word of the row.
|
||||
*/
|
||||
virtual void RestartRow();
|
||||
|
||||
/**
|
||||
* Moves to the start of the next object at the given level in the
|
||||
* page hierarchy, and returns false if the end of the page was reached.
|
||||
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
|
||||
* PageIteratorLevel level values will visit each non-text block once.
|
||||
* Think of non text blocks as containing a single para, with a single line,
|
||||
* with a single imaginary word.
|
||||
* Calls to Next with different levels may be freely intermixed.
|
||||
* This function iterates words in right-to-left scripts correctly, if
|
||||
* the appropriate language has been loaded into Tesseract.
|
||||
*/
|
||||
virtual bool Next(PageIteratorLevel level);
|
||||
|
||||
/**
|
||||
* Returns true if the iterator is at the start of an object at the given
|
||||
* level.
|
||||
*
|
||||
* For instance, suppose an iterator it is pointed to the first symbol of the
|
||||
* first word of the third line of the second paragraph of the first block in
|
||||
* a page, then:
|
||||
* it.IsAtBeginningOf(RIL_BLOCK) = false
|
||||
* it.IsAtBeginningOf(RIL_PARA) = false
|
||||
* it.IsAtBeginningOf(RIL_TEXTLINE) = true
|
||||
* it.IsAtBeginningOf(RIL_WORD) = true
|
||||
* it.IsAtBeginningOf(RIL_SYMBOL) = true
|
||||
*/
|
||||
virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns whether the iterator is positioned at the last element in a
|
||||
* given level. (e.g. the last word in a line, the last line in a block)
|
||||
*
|
||||
* Here's some two-paragraph example
|
||||
* text. It starts off innocuously
|
||||
* enough but quickly turns bizarre.
|
||||
* The author inserts a cornucopia
|
||||
* of words to guard against confused
|
||||
* references.
|
||||
*
|
||||
* Now take an iterator it pointed to the start of "bizarre."
|
||||
* it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
|
||||
* it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
|
||||
* it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
|
||||
*/
|
||||
virtual bool IsAtFinalElement(PageIteratorLevel level,
|
||||
PageIteratorLevel element) const;
|
||||
|
||||
/**
|
||||
* Returns whether this iterator is positioned
|
||||
* before other: -1
|
||||
* equal to other: 0
|
||||
* after other: 1
|
||||
*/
|
||||
int Cmp(const PageIterator &other) const;
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
// Coordinate system:
|
||||
// Integer coordinates are at the cracks between the pixels.
|
||||
// The top-left corner of the top-left pixel in the image is at (0,0).
|
||||
// The bottom-right corner of the bottom-right pixel in the image is at
|
||||
// (width, height).
|
||||
// Every bounding box goes from the top-left of the top-left contained
|
||||
// pixel to the bottom-right of the bottom-right contained pixel, so
|
||||
// the bounding box of the single top-left pixel in the image is:
|
||||
// (0,0)->(1,1).
|
||||
// If an image rectangle has been set in the API, then returned coordinates
|
||||
// relate to the original (full) image, rather than the rectangle.
|
||||
|
||||
/**
|
||||
* Controls what to include in a bounding box. Bounding boxes of all levels
|
||||
* between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
|
||||
* Between layout analysis and recognition, it isn't known where all
|
||||
* diacritics belong, so this control is used to include or exclude some
|
||||
* diacritics that are above or below the main body of the word. In most cases
|
||||
* where the placement is obvious, and after recognition, it doesn't make as
|
||||
* much difference, as the diacritics will already be included in the word.
|
||||
*/
|
||||
void SetBoundingBoxComponents(bool include_upper_dots,
|
||||
bool include_lower_dots) {
|
||||
include_upper_dots_ = include_upper_dots;
|
||||
include_lower_dots_ = include_lower_dots;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the bounding rectangle of the current object at the given level.
|
||||
* See comment on coordinate system above.
|
||||
* Returns false if there is no such object at the current position.
|
||||
* The returned bounding box is guaranteed to match the size and position
|
||||
* of the image returned by GetBinaryImage, but may clip foreground pixels
|
||||
* from a grey image. The padding argument to GetImage can be used to expand
|
||||
* the image to include more foreground pixels. See GetImage below.
|
||||
*/
|
||||
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
|
||||
int *bottom) const;
|
||||
bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
|
||||
int *right, int *bottom) const;
|
||||
/**
|
||||
* Returns the bounding rectangle of the object in a coordinate system of the
|
||||
* working image rectangle having its origin at (rect_left_, rect_top_) with
|
||||
* respect to the original image and is scaled by a factor scale_.
|
||||
*/
|
||||
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
|
||||
int *right, int *bottom) const;
|
||||
|
||||
/** Returns whether there is no object of a given level. */
|
||||
bool Empty(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns the type of the current block.
|
||||
* See tesseract/publictypes.h for PolyBlockType.
|
||||
*/
|
||||
PolyBlockType BlockType() const;
|
||||
|
||||
/**
|
||||
* Returns the polygon outline of the current block. The returned Pta must
|
||||
* be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
|
||||
* of the polygon, and the last edge is the line segment between the last
|
||||
* point and the first point. nullptr will be returned if the iterator is
|
||||
* at the end of the document or layout analysis was not used.
|
||||
*/
|
||||
Pta *BlockPolygon() const;
|
||||
|
||||
/**
|
||||
* Returns a binary image of the current object at the given level.
|
||||
* The position and size match the return from BoundingBoxInternal, and so
|
||||
* this could be upscaled with respect to the original input image.
|
||||
* Use pixDestroy to delete the image after use.
|
||||
*/
|
||||
Pix *GetBinaryImage(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns an image of the current object at the given level in greyscale
|
||||
* if available in the input. To guarantee a binary image use BinaryImage.
|
||||
* NOTE that in order to give the best possible image, the bounds are
|
||||
* expanded slightly over the binary connected component, by the supplied
|
||||
* padding, so the top-left position of the returned image is returned
|
||||
* in (left,top). These will most likely not match the coordinates
|
||||
* returned by BoundingBox.
|
||||
* If you do not supply an original image, you will get a binary one.
|
||||
* Use pixDestroy to delete the image after use.
|
||||
*/
|
||||
Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
|
||||
int *left, int *top) const;
|
||||
|
||||
/**
|
||||
* Returns the baseline of the current object at the given level.
|
||||
* The baseline is the line that passes through (x1, y1) and (x2, y2).
|
||||
* WARNING: with vertical text, baselines may be vertical!
|
||||
* Returns false if there is no baseline at the current position.
|
||||
*/
|
||||
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
|
||||
int *y2) const;
|
||||
|
||||
// Returns the attributes of the current row.
|
||||
void RowAttributes(float *row_height, float *descenders,
|
||||
float *ascenders) const;
|
||||
|
||||
/**
|
||||
* Returns orientation for the block the iterator points to.
|
||||
* orientation, writing_direction, textline_order: see publictypes.h
|
||||
* deskew_angle: after rotating the block so the text orientation is
|
||||
* upright, how many radians does one have to rotate the
|
||||
* block anti-clockwise for it to be level?
|
||||
* -Pi/4 <= deskew_angle <= Pi/4
|
||||
*/
|
||||
void Orientation(tesseract::Orientation *orientation,
|
||||
tesseract::WritingDirection *writing_direction,
|
||||
tesseract::TextlineOrder *textline_order,
|
||||
float *deskew_angle) const;
|
||||
|
||||
/**
|
||||
* Returns information about the current paragraph, if available.
|
||||
*
|
||||
* justification -
|
||||
* LEFT if ragged right, or fully justified and script is left-to-right.
|
||||
* RIGHT if ragged left, or fully justified and script is right-to-left.
|
||||
* unknown if it looks like source code or we have very few lines.
|
||||
* is_list_item -
|
||||
* true if we believe this is a member of an ordered or unordered list.
|
||||
* is_crown -
|
||||
* true if the first line of the paragraph is aligned with the other
|
||||
* lines of the paragraph even though subsequent paragraphs have first
|
||||
* line indents. This typically indicates that this is the continuation
|
||||
* of a previous paragraph or that it is the very first paragraph in
|
||||
* the chapter.
|
||||
* first_line_indent -
|
||||
* For LEFT aligned paragraphs, the first text line of paragraphs of
|
||||
* this kind are indented this many pixels from the left edge of the
|
||||
* rest of the paragraph.
|
||||
* for RIGHT aligned paragraphs, the first text line of paragraphs of
|
||||
* this kind are indented this many pixels from the right edge of the
|
||||
* rest of the paragraph.
|
||||
* NOTE 1: This value may be negative.
|
||||
* NOTE 2: if *is_crown == true, the first line of this paragraph is
|
||||
* actually flush, and first_line_indent is set to the "common"
|
||||
* first_line_indent for subsequent paragraphs in this block
|
||||
* of text.
|
||||
*/
|
||||
void ParagraphInfo(tesseract::ParagraphJustification *justification,
|
||||
bool *is_list_item, bool *is_crown,
|
||||
int *first_line_indent) const;
|
||||
|
||||
// If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
|
||||
// of the current word to the given pointer (takes ownership of the pointer)
|
||||
// and returns true.
|
||||
// Can only be used when iterating on the word level.
|
||||
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Sets up the internal data for iterating the blobs of a new word, then
|
||||
* moves the iterator to the given offset.
|
||||
*/
|
||||
void BeginWord(int offset);
|
||||
|
||||
/** Pointer to the page_res owned by the API. */
|
||||
PAGE_RES *page_res_;
|
||||
/** Pointer to the Tesseract object owned by the API. */
|
||||
Tesseract *tesseract_;
|
||||
/**
|
||||
* The iterator to the page_res_. Owned by this ResultIterator.
|
||||
* A pointer just to avoid dragging in Tesseract includes.
|
||||
*/
|
||||
PAGE_RES_IT *it_;
|
||||
/**
|
||||
* The current input WERD being iterated. If there is an output from OCR,
|
||||
* then word_ is nullptr. Owned by the API
|
||||
*/
|
||||
WERD *word_;
|
||||
/** The length of the current word_. */
|
||||
int word_length_;
|
||||
/** The current blob index within the word. */
|
||||
int blob_index_;
|
||||
/**
|
||||
* Iterator to the blobs within the word. If nullptr, then we are iterating
|
||||
* OCR results in the box_word.
|
||||
* Owned by this ResultIterator.
|
||||
*/
|
||||
C_BLOB_IT *cblob_it_;
|
||||
/** Control over what to include in bounding boxes. */
|
||||
bool include_upper_dots_;
|
||||
bool include_lower_dots_;
|
||||
/** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
|
||||
int scale_;
|
||||
int scaled_yres_;
|
||||
int rect_left_;
|
||||
int rect_top_;
|
||||
int rect_width_;
|
||||
int rect_height_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_
|
|
@ -1,281 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: publictypes.h
|
||||
// Description: Types used in both the API and internally
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// This file contains types that are used both by the API and internally
|
||||
// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
|
||||
// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
|
||||
// Restated: It is OK for low-level Tesseract files to include publictypes.h,
|
||||
// but not for the low-level tesseract code to include top-level API code.
|
||||
// This file should not use other Tesseract types, as that would drag
|
||||
// their includes into the API-level.
|
||||
|
||||
/** Number of printers' points in an inch. The unit of the pointsize return. */
|
||||
constexpr int kPointsPerInch = 72;
|
||||
/**
|
||||
* Minimum believable resolution. Used as a default if there is no other
|
||||
* information, as it is safer to under-estimate than over-estimate.
|
||||
*/
|
||||
constexpr int kMinCredibleResolution = 70;
|
||||
/** Maximum believable resolution. */
|
||||
constexpr int kMaxCredibleResolution = 2400;
|
||||
/**
|
||||
* Ratio between median blob size and likely resolution. Used to estimate
|
||||
* resolution when none is provided. This is basically 1/usual text size in
|
||||
* inches. */
|
||||
constexpr int kResolutionEstimationFactor = 10;
|
||||
|
||||
/**
|
||||
* Possible types for a POLY_BLOCK or ColPartition.
|
||||
* Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
|
||||
* below, as well as kPolyBlockNames in layout_test.cc.
|
||||
* Used extensively by ColPartition, and POLY_BLOCK.
|
||||
*/
|
||||
enum PolyBlockType {
|
||||
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
|
||||
PT_FLOWING_TEXT, // Text that lives inside a column.
|
||||
PT_HEADING_TEXT, // Text that spans more than one column.
|
||||
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
|
||||
PT_EQUATION, // Partition belonging to an equation region.
|
||||
PT_INLINE_EQUATION, // Partition has inline equation.
|
||||
PT_TABLE, // Partition belonging to a table region.
|
||||
PT_VERTICAL_TEXT, // Text-line runs vertically.
|
||||
PT_CAPTION_TEXT, // Text that belongs to an image.
|
||||
PT_FLOWING_IMAGE, // Image that lives inside a column.
|
||||
PT_HEADING_IMAGE, // Image that spans more than one column.
|
||||
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
|
||||
PT_HORZ_LINE, // Horizontal Line.
|
||||
PT_VERT_LINE, // Vertical Line.
|
||||
PT_NOISE, // Lies outside of any column.
|
||||
PT_COUNT
|
||||
};
|
||||
|
||||
/** Returns true if PolyBlockType is of horizontal line type */
|
||||
inline bool PTIsLineType(PolyBlockType type) {
|
||||
return type == PT_HORZ_LINE || type == PT_VERT_LINE;
|
||||
}
|
||||
/** Returns true if PolyBlockType is of image type */
|
||||
inline bool PTIsImageType(PolyBlockType type) {
|
||||
return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
|
||||
type == PT_PULLOUT_IMAGE;
|
||||
}
|
||||
/** Returns true if PolyBlockType is of text type */
|
||||
inline bool PTIsTextType(PolyBlockType type) {
|
||||
return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
|
||||
type == PT_PULLOUT_TEXT || type == PT_TABLE ||
|
||||
type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
|
||||
type == PT_INLINE_EQUATION;
|
||||
}
|
||||
// Returns true if PolyBlockType is of pullout(inter-column) type
|
||||
inline bool PTIsPulloutType(PolyBlockType type) {
|
||||
return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
|
||||
}
|
||||
|
||||
/**
|
||||
* +------------------+ Orientation Example:
|
||||
* | 1 Aaaa Aaaa Aaaa | ====================
|
||||
* | Aaa aa aaa aa | To left is a diagram of some (1) English and
|
||||
* | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit.
|
||||
* | 2 |
|
||||
* | ####### c c C | Upright Latin characters are represented as A and a.
|
||||
* | ####### c c c | '<' represents a latin character rotated
|
||||
* | < ####### c c c | anti-clockwise 90 degrees.
|
||||
* | < ####### c c |
|
||||
* | < ####### . c | Upright Chinese characters are represented C and c.
|
||||
* | 3 ####### c |
|
||||
* +------------------+ NOTA BENE: enum values here should match goodoc.proto
|
||||
|
||||
* If you orient your head so that "up" aligns with Orientation,
|
||||
* then the characters will appear "right side up" and readable.
|
||||
*
|
||||
* In the example above, both the English and Chinese paragraphs are oriented
|
||||
* so their "up" is the top of the page (page up). The photo credit is read
|
||||
* with one's head turned leftward ("up" is to page left).
|
||||
*
|
||||
* The values of this enum match the convention of Tesseract's osdetect.h
|
||||
*/
|
||||
enum Orientation {
|
||||
ORIENTATION_PAGE_UP = 0,
|
||||
ORIENTATION_PAGE_RIGHT = 1,
|
||||
ORIENTATION_PAGE_DOWN = 2,
|
||||
ORIENTATION_PAGE_LEFT = 3,
|
||||
};
|
||||
|
||||
/**
|
||||
* The grapheme clusters within a line of text are laid out logically
|
||||
* in this direction, judged when looking at the text line rotated so that
|
||||
* its Orientation is "page up".
|
||||
*
|
||||
* For English text, the writing direction is left-to-right. For the
|
||||
* Chinese text in the above example, the writing direction is top-to-bottom.
|
||||
*/
|
||||
enum WritingDirection {
|
||||
WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
|
||||
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
|
||||
WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
|
||||
};
|
||||
|
||||
/**
|
||||
* The text lines are read in the given sequence.
|
||||
*
|
||||
* In English, the order is top-to-bottom.
|
||||
* In Chinese, vertical text lines are read right-to-left. Mongolian is
|
||||
* written in vertical columns top to bottom like Chinese, but the lines
|
||||
* order left-to right.
|
||||
*
|
||||
* Note that only some combinations make sense. For example,
|
||||
* WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
|
||||
*/
|
||||
enum TextlineOrder {
|
||||
TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
|
||||
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
|
||||
TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
|
||||
};
|
||||
|
||||
/**
|
||||
* Possible modes for page layout analysis. These *must* be kept in order
|
||||
* of decreasing amount of layout analysis to be done, except for OSD_ONLY,
|
||||
* so that the inequality test macros below work.
|
||||
*/
|
||||
enum PageSegMode {
|
||||
PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
|
||||
PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
|
||||
///< script detection. (OSD)
|
||||
PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
|
||||
PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
|
||||
PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
|
||||
PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
|
||||
///< vertically aligned text.
|
||||
PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
|
||||
PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
|
||||
PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
|
||||
PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
|
||||
PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
|
||||
PSM_SPARSE_TEXT =
|
||||
11, ///< Find as much text as possible in no particular order.
|
||||
PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
|
||||
PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
|
||||
///< hacks that are Tesseract-specific.
|
||||
|
||||
PSM_COUNT ///< Number of enum entries.
|
||||
};
|
||||
|
||||
/**
|
||||
* Inline functions that act on a PageSegMode to determine whether components of
|
||||
* layout analysis are enabled.
|
||||
* *Depend critically on the order of elements of PageSegMode.*
|
||||
* NOTE that arg is an int for compatibility with INT_PARAM.
|
||||
*/
|
||||
inline bool PSM_OSD_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
|
||||
}
|
||||
inline bool PSM_SPARSE(int pageseg_mode) {
|
||||
return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
|
||||
}
|
||||
inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
|
||||
}
|
||||
inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
|
||||
return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
|
||||
pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
|
||||
/**
|
||||
* enum of the elements of the page hierarchy, used in ResultIterator
|
||||
* to provide functions that operate on each level without having to
|
||||
* have 5x as many functions.
|
||||
*/
|
||||
enum PageIteratorLevel {
|
||||
RIL_BLOCK, // Block of text/image/separator line.
|
||||
RIL_PARA, // Paragraph within a block.
|
||||
RIL_TEXTLINE, // Line within a paragraph.
|
||||
RIL_WORD, // Word within a textline.
|
||||
RIL_SYMBOL // Symbol/character within a word.
|
||||
};
|
||||
|
||||
/**
|
||||
* JUSTIFICATION_UNKNOWN
|
||||
* The alignment is not clearly one of the other options. This could happen
|
||||
* for example if there are only one or two lines of text or the text looks
|
||||
* like source code or poetry.
|
||||
*
|
||||
* NOTA BENE: Fully justified paragraphs (text aligned to both left and right
|
||||
* margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
|
||||
* is written with a left-to-right script and with JUSTIFICATION_RIGHT if
|
||||
* their text is written in a right-to-left script.
|
||||
*
|
||||
* Interpretation for text read in vertical lines:
|
||||
* "Left" is wherever the starting reading position is.
|
||||
*
|
||||
* JUSTIFICATION_LEFT
|
||||
* Each line, except possibly the first, is flush to the same left tab stop.
|
||||
*
|
||||
* JUSTIFICATION_CENTER
|
||||
* The text lines of the paragraph are centered about a line going
|
||||
* down through their middle of the text lines.
|
||||
*
|
||||
* JUSTIFICATION_RIGHT
|
||||
* Each line, except possibly the first, is flush to the same right tab stop.
|
||||
*/
|
||||
enum ParagraphJustification {
|
||||
JUSTIFICATION_UNKNOWN,
|
||||
JUSTIFICATION_LEFT,
|
||||
JUSTIFICATION_CENTER,
|
||||
JUSTIFICATION_RIGHT,
|
||||
};
|
||||
|
||||
/**
|
||||
* When Tesseract/Cube is initialized we can choose to instantiate/load/run
|
||||
* only the Tesseract part, only the Cube part or both along with the combiner.
|
||||
* The preference of which engine to use is stored in tessedit_ocr_engine_mode.
|
||||
*
|
||||
* ATTENTION: When modifying this enum, please make sure to make the
|
||||
* appropriate changes to all the enums mirroring it (e.g. OCREngine in
|
||||
* cityblock/workflow/detection/detection_storage.proto). Such enums will
|
||||
* mention the connection to OcrEngineMode in the comments.
|
||||
*/
|
||||
enum OcrEngineMode {
|
||||
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
|
||||
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
|
||||
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
|
||||
// to Tesseract when things get difficult.
|
||||
// deprecated
|
||||
OEM_DEFAULT, // Specify this mode when calling init_*(),
|
||||
// to indicate that any of the above modes
|
||||
// should be automatically inferred from the
|
||||
// variables in the language-specific config,
|
||||
// command-line configs, or if not specified
|
||||
// in any of the above should be set to the
|
||||
// default OEM_TESSERACT_ONLY.
|
||||
OEM_COUNT // Number of OEMs
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
|
@ -1,311 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: renderer.h
|
||||
// Description: Rendering interface to inject into TessBaseAPI
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_RENDERER_H_
|
||||
#define TESSERACT_API_RENDERER_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
|
||||
// complexity of includes here. Use forward declarations wherever possible
|
||||
// and hide includes of complex types in baseapi.cpp.
|
||||
#include <cstdint>
|
||||
#include <string> // for std::string
|
||||
#include <vector> // for std::vector
|
||||
|
||||
struct Pix;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class TessBaseAPI;
|
||||
|
||||
/**
|
||||
* Interface for rendering tesseract results into a document, such as text,
|
||||
* HOCR or pdf. This class is abstract. Specific classes handle individual
|
||||
* formats. This interface is then used to inject the renderer class into
|
||||
* tesseract when processing images.
|
||||
*
|
||||
* For simplicity implementing this with tesseract version 3.01,
|
||||
* the renderer contains document state that is cleared from document
|
||||
* to document just as the TessBaseAPI is. This way the base API can just
|
||||
* delegate its rendering functionality to injected renderers, and the
|
||||
* renderers can manage the associated state needed for the specific formats
|
||||
* in addition to the heuristics for producing it.
|
||||
*/
|
||||
class TESS_API TessResultRenderer {
|
||||
public:
|
||||
virtual ~TessResultRenderer();
|
||||
|
||||
// Takes ownership of pointer so must be new'd instance.
|
||||
// Renderers aren't ordered, but appends the sequences of next parameter
|
||||
// and existing next(). The renderers should be unique across both lists.
|
||||
void insert(TessResultRenderer *next);
|
||||
|
||||
// Returns the next renderer or nullptr.
|
||||
TessResultRenderer *next() {
|
||||
return next_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts a new document with the given title.
|
||||
* This clears the contents of the output data.
|
||||
* Title should use UTF-8 encoding.
|
||||
*/
|
||||
bool BeginDocument(const char *title);
|
||||
|
||||
/**
|
||||
* Adds the recognized text from the source image to the current document.
|
||||
* Invalid if BeginDocument not yet called.
|
||||
*
|
||||
* Note that this API is a bit weird but is designed to fit into the
|
||||
* current TessBaseAPI implementation where the api has lots of state
|
||||
* information that we might want to add in.
|
||||
*/
|
||||
bool AddImage(TessBaseAPI *api);
|
||||
|
||||
/**
|
||||
* Finishes the document and finalizes the output data
|
||||
* Invalid if BeginDocument not yet called.
|
||||
*/
|
||||
bool EndDocument();
|
||||
|
||||
const char *file_extension() const {
|
||||
return file_extension_;
|
||||
}
|
||||
const char *title() const {
|
||||
return title_.c_str();
|
||||
}
|
||||
|
||||
// Is everything fine? Otherwise something went wrong.
|
||||
bool happy() const {
|
||||
return happy_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the last image given to AddImage
|
||||
* (i.e. images are incremented whether the image succeeded or not)
|
||||
*
|
||||
* This is always defined. It means either the number of the
|
||||
* current image, the last image ended, or in the completed document
|
||||
* depending on when in the document lifecycle you are looking at it.
|
||||
* Will return -1 if a document was never started.
|
||||
*/
|
||||
int imagenum() const {
|
||||
return imagenum_;
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Called by concrete classes.
|
||||
*
|
||||
* outputbase is the name of the output file excluding
|
||||
* extension. For example, "/path/to/chocolate-chip-cookie-recipe"
|
||||
*
|
||||
* extension indicates the file extension to be used for output
|
||||
* files. For example "pdf" will produce a .pdf file, and "hocr"
|
||||
* will produce .hocr files.
|
||||
*/
|
||||
TessResultRenderer(const char *outputbase, const char *extension);
|
||||
|
||||
// Hook for specialized handling in BeginDocument()
|
||||
virtual bool BeginDocumentHandler();
|
||||
|
||||
// This must be overridden to render the OCR'd results
|
||||
virtual bool AddImageHandler(TessBaseAPI *api) = 0;
|
||||
|
||||
// Hook for specialized handling in EndDocument()
|
||||
virtual bool EndDocumentHandler();
|
||||
|
||||
// Renderers can call this to append '\0' terminated strings into
|
||||
// the output string returned by GetOutput.
|
||||
// This method will grow the output buffer if needed.
|
||||
void AppendString(const char *s);
|
||||
|
||||
// Renderers can call this to append binary byte sequences into
|
||||
// the output string returned by GetOutput. Note that s is not necessarily
|
||||
// '\0' terminated (and can contain '\0' within it).
|
||||
// This method will grow the output buffer if needed.
|
||||
void AppendData(const char *s, int len);
|
||||
|
||||
private:
|
||||
TessResultRenderer *next_; // Can link multiple renderers together
|
||||
FILE *fout_; // output file pointer
|
||||
const char *file_extension_; // standard extension for generated output
|
||||
std::string title_; // title of document being rendered
|
||||
int imagenum_; // index of last image added
|
||||
bool happy_; // I get grumpy when the disk fills up, etc.
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessTextRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessTextRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an hocr text string
|
||||
*/
|
||||
class TESS_API TessHOcrRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessHOcrRenderer(const char *outputbase, bool font_info);
|
||||
explicit TessHOcrRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool font_info_; // whether to print font information
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an alto text string
|
||||
*/
|
||||
class TESS_API TessAltoRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessAltoRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool begin_document;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders Tesseract output into a TSV string
|
||||
*/
|
||||
class TESS_API TessTsvRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessTsvRenderer(const char *outputbase, bool font_info);
|
||||
explicit TessTsvRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool font_info_; // whether to print font information
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into searchable PDF
|
||||
*/
|
||||
class TESS_API TessPDFRenderer : public TessResultRenderer {
|
||||
public:
|
||||
// datadir is the location of the TESSDATA. We need it because
|
||||
// we load a custom PDF font from this location.
|
||||
TessPDFRenderer(const char *outputbase, const char *datadir,
|
||||
bool textonly = false);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
// We don't want to have every image in memory at once,
|
||||
// so we store some metadata as we go along producing
|
||||
// PDFs one page at a time. At the end, that metadata is
|
||||
// used to make everything that isn't easily handled in a
|
||||
// streaming fashion.
|
||||
long int obj_; // counter for PDF objects
|
||||
std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
|
||||
std::vector<long int> pages_; // object number for every /Page object
|
||||
std::string datadir_; // where to find the custom font
|
||||
bool textonly_; // skip images if set
|
||||
// Bookkeeping only. DIY = Do It Yourself.
|
||||
void AppendPDFObjectDIY(size_t objectsize);
|
||||
// Bookkeeping + emit data.
|
||||
void AppendPDFObject(const char *data);
|
||||
// Create the /Contents object for an entire page.
|
||||
char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
|
||||
// Turn an image into a PDF object. Only transcode if we have to.
|
||||
static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
|
||||
char **pdf_object, long int *pdf_object_size,
|
||||
int jpg_quality);
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessUnlvRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessUnlvRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string for LSTMBox
|
||||
*/
|
||||
class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessLSTMBoxRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessBoxTextRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string in WordStr format
|
||||
*/
|
||||
class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessWordStrBoxRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an osd text string
|
||||
*/
|
||||
class TESS_API TessOsdRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessOsdRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_API_RENDERER_H_
|
|
@ -1,250 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: resultiterator.h
|
||||
// Description: Iterator for tesseract results that is capable of
|
||||
// iterating in proper reading order over Bi Directional
|
||||
// (e.g. mixed Hebrew and English) text.
|
||||
// Author: David Eger
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
||||
|
||||
#include "export.h" // for TESS_API, TESS_LOCAL
|
||||
#include "ltrresultiterator.h" // for LTRResultIterator
|
||||
#include "publictypes.h" // for PageIteratorLevel
|
||||
#include "unichar.h" // for StrongScriptDirection
|
||||
|
||||
#include <set> // for std::pair
|
||||
#include <vector> // for std::vector
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class TESS_API ResultIterator : public LTRResultIterator {
|
||||
public:
|
||||
static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
|
||||
|
||||
/**
|
||||
* ResultIterator is copy constructible!
|
||||
* The default copy constructor works just fine for us.
|
||||
*/
|
||||
~ResultIterator() override = default;
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
/**
|
||||
* Moves the iterator to point to the start of the page to begin
|
||||
* an iteration.
|
||||
*/
|
||||
void Begin() override;
|
||||
|
||||
/**
|
||||
* Moves to the start of the next object at the given level in the
|
||||
* page hierarchy in the appropriate reading order and returns false if
|
||||
* the end of the page was reached.
|
||||
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
|
||||
* PageIteratorLevel level values will visit each non-text block once.
|
||||
* Think of non text blocks as containing a single para, with a single line,
|
||||
* with a single imaginary word.
|
||||
* Calls to Next with different levels may be freely intermixed.
|
||||
* This function iterates words in right-to-left scripts correctly, if
|
||||
* the appropriate language has been loaded into Tesseract.
|
||||
*/
|
||||
bool Next(PageIteratorLevel level) override;
|
||||
|
||||
/**
|
||||
* IsAtBeginningOf() returns whether we're at the logical beginning of the
|
||||
* given level. (as opposed to ResultIterator's left-to-right top-to-bottom
|
||||
* order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
|
||||
* For a full description, see pageiterator.h
|
||||
*/
|
||||
bool IsAtBeginningOf(PageIteratorLevel level) const override;
|
||||
|
||||
/**
|
||||
* Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
|
||||
* For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
|
||||
* point at the last word in a paragraph. See PageIterator for full comment.
|
||||
*/
|
||||
bool IsAtFinalElement(PageIteratorLevel level,
|
||||
PageIteratorLevel element) const override;
|
||||
|
||||
// ============= Functions that refer to words only ============.
|
||||
// Returns the number of blanks before the current word.
|
||||
int BlanksBeforeWord() const;
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
/**
|
||||
* Returns the null terminated UTF-8 encoded text string for the current
|
||||
* object at the given level. Use delete [] to free after use.
|
||||
*/
|
||||
virtual char *GetUTF8Text(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns the LSTM choices for every LSTM timestep for the current word.
|
||||
*/
|
||||
virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
|
||||
*GetRawLSTMTimesteps() const;
|
||||
virtual std::vector<std::vector<std::pair<const char *, float>>>
|
||||
*GetBestLSTMSymbolChoices() const;
|
||||
|
||||
/**
|
||||
* Return whether the current paragraph's dominant reading direction
|
||||
* is left-to-right (as opposed to right-to-left).
|
||||
*/
|
||||
bool ParagraphIsLtr() const;
|
||||
|
||||
// ============= Exposed only for testing =============.
|
||||
|
||||
/**
|
||||
* Yields the reading order as a sequence of indices and (optional)
|
||||
* meta-marks for a set of words (given left-to-right).
|
||||
* The meta marks are passed as negative values:
|
||||
* kMinorRunStart Start of minor direction text.
|
||||
* kMinorRunEnd End of minor direction text.
|
||||
* kComplexWord The next indexed word contains both left-to-right and
|
||||
* right-to-left characters and was treated as neutral.
|
||||
*
|
||||
* For example, suppose we have five words in a text line,
|
||||
* indexed [0,1,2,3,4] from the leftmost side of the text line.
|
||||
* The following are all believable reading_orders:
|
||||
*
|
||||
* Left-to-Right (in ltr paragraph):
|
||||
* { 0, 1, 2, 3, 4 }
|
||||
* Left-to-Right (in rtl paragraph):
|
||||
* { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
|
||||
* Right-to-Left (in rtl paragraph):
|
||||
* { 4, 3, 2, 1, 0 }
|
||||
* Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
|
||||
* { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
|
||||
*/
|
||||
static void CalculateTextlineOrder(
|
||||
bool paragraph_is_ltr,
|
||||
const std::vector<StrongScriptDirection> &word_dirs,
|
||||
std::vector<int> *reading_order);
|
||||
|
||||
static const int kMinorRunStart;
|
||||
static const int kMinorRunEnd;
|
||||
static const int kComplexWord;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* We presume the data associated with the given iterator will outlive us.
|
||||
* NB: This is private because it does something that is non-obvious:
|
||||
* it resets to the beginning of the paragraph instead of staying wherever
|
||||
* resit might have pointed.
|
||||
*/
|
||||
explicit ResultIterator(const LTRResultIterator &resit);
|
||||
|
||||
private:
|
||||
/**
|
||||
* Calculates the current paragraph's dominant writing direction.
|
||||
* Typically, members should use current_paragraph_ltr_ instead.
|
||||
*/
|
||||
bool CurrentParagraphIsLtr() const;
|
||||
|
||||
/**
|
||||
* Returns word indices as measured from resit->RestartRow() = index 0
|
||||
* for the reading order of words within a textline given an iterator
|
||||
* into the middle of the text line.
|
||||
* In addition to non-negative word indices, the following negative values
|
||||
* may be inserted:
|
||||
* kMinorRunStart Start of minor direction text.
|
||||
* kMinorRunEnd End of minor direction text.
|
||||
* kComplexWord The previous word contains both left-to-right and
|
||||
* right-to-left characters and was treated as neutral.
|
||||
*/
|
||||
void CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
const LTRResultIterator &resit,
|
||||
std::vector<int> *indices) const;
|
||||
/** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
|
||||
void CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
const LTRResultIterator &resit,
|
||||
std::vector<StrongScriptDirection> *ssd,
|
||||
std::vector<int> *indices) const;
|
||||
|
||||
/**
|
||||
* What is the index of the current word in a strict left-to-right reading
|
||||
* of the row?
|
||||
*/
|
||||
int LTRWordIndex() const;
|
||||
|
||||
/**
|
||||
* Given an iterator pointing at a word, returns the logical reading order
|
||||
* of blob indices for the word.
|
||||
*/
|
||||
void CalculateBlobOrder(std::vector<int> *blob_indices) const;
|
||||
|
||||
/** Precondition: current_paragraph_is_ltr_ is set. */
|
||||
void MoveToLogicalStartOfTextline();
|
||||
|
||||
/**
|
||||
* Precondition: current_paragraph_is_ltr_ and in_minor_direction_
|
||||
* are set.
|
||||
*/
|
||||
void MoveToLogicalStartOfWord();
|
||||
|
||||
/** Are we pointing at the final (reading order) symbol of the word? */
|
||||
bool IsAtFinalSymbolOfWord() const;
|
||||
|
||||
/** Are we pointing at the first (reading order) symbol of the word? */
|
||||
bool IsAtFirstSymbolOfWord() const;
|
||||
|
||||
/**
|
||||
* Append any extra marks that should be appended to this word when printed.
|
||||
* Mostly, these are Unicode BiDi control characters.
|
||||
*/
|
||||
void AppendSuffixMarks(std::string *text) const;
|
||||
|
||||
/** Appends the current word in reading order to the given buffer.*/
|
||||
void AppendUTF8WordText(std::string *text) const;
|
||||
|
||||
/**
|
||||
* Appends the text of the current text line, *assuming this iterator is
|
||||
* positioned at the beginning of the text line* This function
|
||||
* updates the iterator to point to the first position past the text line.
|
||||
* Each textline is terminated in a single newline character.
|
||||
* If the textline ends a paragraph, it gets a second terminal newline.
|
||||
*/
|
||||
void IterateAndAppendUTF8TextlineText(std::string *text);
|
||||
|
||||
/**
|
||||
* Appends the text of the current paragraph in reading order
|
||||
* to the given buffer.
|
||||
* Each textline is terminated in a single newline character, and the
|
||||
* paragraph gets an extra newline at the end.
|
||||
*/
|
||||
void AppendUTF8ParagraphText(std::string *text) const;
|
||||
|
||||
/** Returns whether the bidi_debug flag is set to at least min_level. */
|
||||
bool BidiDebug(int min_level) const;
|
||||
|
||||
bool current_paragraph_is_ltr_;
|
||||
|
||||
/**
|
||||
* Is the currently pointed-at character at the beginning of
|
||||
* a minor-direction run?
|
||||
*/
|
||||
bool at_beginning_of_minor_run_;
|
||||
|
||||
/** Is the currently pointed-at character in a minor-direction sequence? */
|
||||
bool in_minor_direction_;
|
||||
|
||||
/**
|
||||
* Should detected inter-word spaces be preserved, or "compressed" to a single
|
||||
* space character (default behavior).
|
||||
*/
|
||||
bool preserve_interword_spaces_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
|
@ -1,174 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: unichar.h
|
||||
// Description: Unicode character/ligature class.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCUTIL_UNICHAR_H_
|
||||
#define TESSERACT_CCUTIL_UNICHAR_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
#include <memory.h>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Maximum number of characters that can be stored in a UNICHAR. Must be
|
||||
// at least 4. Must not exceed 31 without changing the coding of length.
|
||||
#define UNICHAR_LEN 30
|
||||
|
||||
// A UNICHAR_ID is the unique id of a unichar.
|
||||
using UNICHAR_ID = int;
|
||||
|
||||
// A variable to indicate an invalid or uninitialized unichar id.
|
||||
static const int INVALID_UNICHAR_ID = -1;
|
||||
// A special unichar that corresponds to INVALID_UNICHAR_ID.
|
||||
static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
|
||||
|
||||
enum StrongScriptDirection {
|
||||
DIR_NEUTRAL = 0, // Text contains only neutral characters.
|
||||
DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
|
||||
DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
|
||||
DIR_MIX = 3, // Text contains a mixture of left-to-right
|
||||
// and right-to-left characters.
|
||||
};
|
||||
|
||||
using char32 = signed int;
|
||||
|
||||
// The UNICHAR class holds a single classification result. This may be
|
||||
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
|
||||
// multiple Unicode characters representing the NFKC expansion of a ligature
|
||||
// such as fi, ffl etc. These are also stored as utf8.
|
||||
class TESS_API UNICHAR {
|
||||
public:
|
||||
UNICHAR() {
|
||||
memset(chars, 0, UNICHAR_LEN);
|
||||
}
|
||||
|
||||
// Construct from a utf8 string. If len<0 then the string is null terminated.
|
||||
// If the string is too long to fit in the UNICHAR then it takes only what
|
||||
// will fit.
|
||||
UNICHAR(const char *utf8_str, int len);
|
||||
|
||||
// Construct from a single UCS4 character.
|
||||
explicit UNICHAR(int unicode);
|
||||
|
||||
// Default copy constructor and operator= are OK.
|
||||
|
||||
// Get the first character as UCS-4.
|
||||
int first_uni() const;
|
||||
|
||||
// Get the length of the UTF8 string.
|
||||
int utf8_len() const {
|
||||
int len = chars[UNICHAR_LEN - 1];
|
||||
return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
|
||||
}
|
||||
|
||||
// Get a UTF8 string, but NOT nullptr terminated.
|
||||
const char *utf8() const {
|
||||
return chars;
|
||||
}
|
||||
|
||||
// Get a terminated UTF8 string: Must delete[] it after use.
|
||||
char *utf8_str() const;
|
||||
|
||||
// Get the number of bytes in the first character of the given utf8 string.
|
||||
static int utf8_step(const char *utf8_str);
|
||||
|
||||
// A class to simplify iterating over and accessing elements of a UTF8
|
||||
// string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
|
||||
// take ownership of the underlying byte array. It also does not permit
|
||||
// modification of the array (as the name suggests).
|
||||
//
|
||||
// Example:
|
||||
// for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
|
||||
// it != UNICHAR::end(str, len);
|
||||
// ++it) {
|
||||
// printf("UCS-4 symbol code = %d\n", *it);
|
||||
// char buf[5];
|
||||
// int char_len = it.get_utf8(buf); buf[char_len] = '\0';
|
||||
// printf("Char = %s\n", buf);
|
||||
// }
|
||||
class TESS_API const_iterator {
|
||||
using CI = const_iterator;
|
||||
|
||||
public:
|
||||
// Step to the next UTF8 character.
|
||||
// If the current position is at an illegal UTF8 character, then print an
|
||||
// error message and step by one byte. If the current position is at a
|
||||
// nullptr value, don't step past it.
|
||||
const_iterator &operator++();
|
||||
|
||||
// Return the UCS-4 value at the current position.
|
||||
// If the current position is at an illegal UTF8 value, return a single
|
||||
// space character.
|
||||
int operator*() const;
|
||||
|
||||
// Store the UTF-8 encoding of the current codepoint into buf, which must be
|
||||
// at least 4 bytes long. Return the number of bytes written.
|
||||
// If the current position is at an illegal UTF8 value, writes a single
|
||||
// space character and returns 1.
|
||||
// Note that this method does not null-terminate the buffer.
|
||||
int get_utf8(char *buf) const;
|
||||
// Returns the number of bytes of the current codepoint. Returns 1 if the
|
||||
// current position is at an illegal UTF8 value.
|
||||
int utf8_len() const;
|
||||
// Returns true if the UTF-8 encoding at the current position is legal.
|
||||
bool is_legal() const;
|
||||
|
||||
// Return the pointer into the string at the current position.
|
||||
const char *utf8_data() const {
|
||||
return it_;
|
||||
}
|
||||
|
||||
// Iterator equality operators.
|
||||
friend bool operator==(const CI &lhs, const CI &rhs) {
|
||||
return lhs.it_ == rhs.it_;
|
||||
}
|
||||
friend bool operator!=(const CI &lhs, const CI &rhs) {
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
private:
|
||||
friend class UNICHAR;
|
||||
explicit const_iterator(const char *it) : it_(it) {}
|
||||
|
||||
const char *it_; // Pointer into the string.
|
||||
};
|
||||
|
||||
// Create a start/end iterator pointing to a string. Note that these methods
|
||||
// are static and do NOT create a copy or take ownership of the underlying
|
||||
// array.
|
||||
static const_iterator begin(const char *utf8_str, int byte_length);
|
||||
static const_iterator end(const char *utf8_str, int byte_length);
|
||||
|
||||
// Converts a utf-8 string to a vector of unicodes.
|
||||
// Returns an empty vector if the input contains invalid UTF-8.
|
||||
static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
|
||||
// Converts a vector of unicodes to a utf8 string.
|
||||
// Returns an empty string if the input contains an invalid unicode.
|
||||
static std::string UTF32ToUTF8(const std::vector<char32> &str32);
|
||||
|
||||
private:
|
||||
// A UTF-8 representation of 1 or more Unicode characters.
|
||||
// The last element (chars[UNICHAR_LEN - 1]) is a length if
|
||||
// its value < UNICHAR_LEN, otherwise it is a genuine character.
|
||||
char chars[UNICHAR_LEN]{};
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCUTIL_UNICHAR_H_
|
|
@ -1,34 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: version.h
|
||||
// Description: Version information
|
||||
//
|
||||
// (C) Copyright 2018, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_VERSION_H_
|
||||
#define TESSERACT_API_VERSION_H_
|
||||
|
||||
// clang-format off
|
||||
|
||||
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
|
||||
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
|
||||
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
|
||||
|
||||
#define TESSERACT_VERSION \
|
||||
(TESSERACT_MAJOR_VERSION << 16 | \
|
||||
TESSERACT_MINOR_VERSION << 8 | \
|
||||
TESSERACT_MICRO_VERSION)
|
||||
|
||||
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
|
||||
|
||||
// clang-format on
|
||||
|
||||
#endif // TESSERACT_API_VERSION_H_
|
|
@ -1,812 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: baseapi.h
|
||||
// Description: Simple API for calling tesseract.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_BASEAPI_H_
|
||||
#define TESSERACT_API_BASEAPI_H_
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config_auto.h" // DISABLED_LEGACY_ENGINE
|
||||
#endif
|
||||
|
||||
#include "export.h"
|
||||
#include "pageiterator.h"
|
||||
#include "publictypes.h"
|
||||
#include "resultiterator.h"
|
||||
#include "unichar.h"
|
||||
|
||||
#include "version.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <vector> // for std::vector
|
||||
|
||||
struct Pix;
|
||||
struct Pixa;
|
||||
struct Boxa;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class PAGE_RES;
|
||||
class ParagraphModel;
|
||||
class BLOCK_LIST;
|
||||
class ETEXT_DESC;
|
||||
struct OSResults;
|
||||
class UNICHARSET;
|
||||
|
||||
class Dawg;
|
||||
class Dict;
|
||||
class EquationDetect;
|
||||
class PageIterator;
|
||||
class ImageThresholder;
|
||||
class LTRResultIterator;
|
||||
class ResultIterator;
|
||||
class MutableIterator;
|
||||
class TessResultRenderer;
|
||||
class Tesseract;
|
||||
|
||||
// Function to read a std::vector<char> from a whole file.
|
||||
// Returns false on failure.
|
||||
using FileReader = bool (*)(const char *filename, std::vector<char> *data);
|
||||
|
||||
using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
|
||||
bool) const;
|
||||
using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
|
||||
int, const char *, int);
|
||||
|
||||
/**
|
||||
* Base class for all tesseract APIs.
|
||||
* Specific classes can add ability to work on different inputs or produce
|
||||
* different outputs.
|
||||
* This class is mostly an interface layer on top of the Tesseract instance
|
||||
* class to hide the data types so that users of this class don't have to
|
||||
* include any other Tesseract headers.
|
||||
*/
|
||||
class TESS_API TessBaseAPI {
|
||||
public:
|
||||
TessBaseAPI();
|
||||
virtual ~TessBaseAPI();
|
||||
// Copy constructor and assignment operator are currently unsupported.
|
||||
TessBaseAPI(TessBaseAPI const &) = delete;
|
||||
TessBaseAPI &operator=(TessBaseAPI const &) = delete;
|
||||
|
||||
/**
|
||||
* Returns the version identifier as a static string. Do not delete.
|
||||
*/
|
||||
static const char *Version();
|
||||
|
||||
/**
|
||||
* If compiled with OpenCL AND an available OpenCL
|
||||
* device is deemed faster than serial code, then
|
||||
* "device" is populated with the cl_device_id
|
||||
* and returns sizeof(cl_device_id)
|
||||
* otherwise *device=nullptr and returns 0.
|
||||
*/
|
||||
static size_t getOpenCLDevice(void **device);
|
||||
|
||||
/**
|
||||
* Set the name of the input file. Needed for training and
|
||||
* reading a UNLV zone file, and for searchable PDF output.
|
||||
*/
|
||||
void SetInputName(const char *name);
|
||||
/**
|
||||
* These functions are required for searchable PDF output.
|
||||
* We need our hands on the input file so that we can include
|
||||
* it in the PDF without transcoding. If that is not possible,
|
||||
* we need the original image. Finally, resolution metadata
|
||||
* is stored in the PDF so we need that as well.
|
||||
*/
|
||||
const char *GetInputName();
|
||||
// Takes ownership of the input pix.
|
||||
void SetInputImage(Pix *pix);
|
||||
Pix *GetInputImage();
|
||||
int GetSourceYResolution();
|
||||
const char *GetDatapath();
|
||||
|
||||
/** Set the name of the bonus output files. Needed only for debugging. */
|
||||
void SetOutputName(const char *name);
|
||||
|
||||
/**
|
||||
* Set the value of an internal "parameter."
|
||||
* Supply the name of the parameter and the value as a string, just as
|
||||
* you would in a config file.
|
||||
* Returns false if the name lookup failed.
|
||||
* Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
|
||||
* Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
|
||||
* SetVariable may be used before Init, but settings will revert to
|
||||
* defaults on End().
|
||||
*
|
||||
* Note: Must be called after Init(). Only works for non-init variables
|
||||
* (init variables should be passed to Init()).
|
||||
*/
|
||||
bool SetVariable(const char *name, const char *value);
|
||||
bool SetDebugVariable(const char *name, const char *value);
|
||||
|
||||
/**
|
||||
* Returns true if the parameter was found among Tesseract parameters.
|
||||
* Fills in value with the value of the parameter.
|
||||
*/
|
||||
bool GetIntVariable(const char *name, int *value) const;
|
||||
bool GetBoolVariable(const char *name, bool *value) const;
|
||||
bool GetDoubleVariable(const char *name, double *value) const;
|
||||
|
||||
/**
|
||||
* Returns the pointer to the string that represents the value of the
|
||||
* parameter if it was found among Tesseract parameters.
|
||||
*/
|
||||
const char *GetStringVariable(const char *name) const;
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Print Tesseract fonts table to the given file.
|
||||
*/
|
||||
void PrintFontsTable(FILE *fp) const;
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Print Tesseract parameters to the given file.
|
||||
*/
|
||||
void PrintVariables(FILE *fp) const;
|
||||
|
||||
/**
|
||||
* Get value of named variable as a string, if it exists.
|
||||
*/
|
||||
bool GetVariableAsString(const char *name, std::string *val) const;
|
||||
|
||||
/**
|
||||
* Instances are now mostly thread-safe and totally independent,
|
||||
* but some global parameters remain. Basically it is safe to use multiple
|
||||
* TessBaseAPIs in different threads in parallel, UNLESS:
|
||||
* you use SetVariable on some of the Params in classify and textord.
|
||||
* If you do, then the effect will be to change it for all your instances.
|
||||
*
|
||||
* Start tesseract. Returns zero on success and -1 on failure.
|
||||
* NOTE that the only members that may be called before Init are those
|
||||
* listed above here in the class definition.
|
||||
*
|
||||
* The datapath must be the name of the tessdata directory.
|
||||
* The language is (usually) an ISO 639-3 string or nullptr will default to
|
||||
* eng. It is entirely safe (and eventually will be efficient too) to call
|
||||
* Init multiple times on the same instance to change language, or just
|
||||
* to reset the classifier.
|
||||
* The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
|
||||
* that multiple languages are to be loaded. Eg hin+eng will load Hindi and
|
||||
* English. Languages may specify internally that they want to be loaded
|
||||
* with one or more other languages, so the ~ sign is available to override
|
||||
* that. Eg if hin were set to load eng by default, then hin+~eng would force
|
||||
* loading only hin. The number of loaded languages is limited only by
|
||||
* memory, with the caveat that loading additional languages will impact
|
||||
* both speed and accuracy, as there is more work to do to decide on the
|
||||
* applicable language, and there is more chance of hallucinating incorrect
|
||||
* words.
|
||||
* WARNING: On changing languages, all Tesseract parameters are reset
|
||||
* back to their default values. (Which may vary between languages.)
|
||||
* If you have a rare need to set a Variable that controls
|
||||
* initialization for a second call to Init you should explicitly
|
||||
* call End() and then use SetVariable before Init. This is only a very
|
||||
* rare use case, since there are very few uses that require any parameters
|
||||
* to be set before Init.
|
||||
*
|
||||
* If set_only_non_debug_params is true, only params that do not contain
|
||||
* "debug" in the name will be set.
|
||||
*/
|
||||
int Init(const char *datapath, const char *language, OcrEngineMode mode,
|
||||
char **configs, int configs_size,
|
||||
const std::vector<std::string> *vars_vec,
|
||||
const std::vector<std::string> *vars_values,
|
||||
bool set_only_non_debug_params);
|
||||
int Init(const char *datapath, const char *language, OcrEngineMode oem) {
|
||||
return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
|
||||
}
|
||||
int Init(const char *datapath, const char *language) {
|
||||
return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
|
||||
false);
|
||||
}
|
||||
// In-memory version reads the traineddata file directly from the given
|
||||
// data[data_size] array, and/or reads data via a FileReader.
|
||||
int Init(const char *data, int data_size, const char *language,
|
||||
OcrEngineMode mode, char **configs, int configs_size,
|
||||
const std::vector<std::string> *vars_vec,
|
||||
const std::vector<std::string> *vars_values,
|
||||
bool set_only_non_debug_params, FileReader reader);
|
||||
|
||||
/**
|
||||
* Returns the languages string used in the last valid initialization.
|
||||
* If the last initialization specified "deu+hin" then that will be
|
||||
* returned. If hin loaded eng automatically as well, then that will
|
||||
* not be included in this list. To find the languages actually
|
||||
* loaded use GetLoadedLanguagesAsVector.
|
||||
* The returned string should NOT be deleted.
|
||||
*/
|
||||
const char *GetInitLanguagesAsString() const;
|
||||
|
||||
/**
|
||||
* Returns the loaded languages in the vector of std::string.
|
||||
* Includes all languages loaded by the last Init, including those loaded
|
||||
* as dependencies of other loaded languages.
|
||||
*/
|
||||
void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
|
||||
|
||||
/**
|
||||
* Returns the available languages in the sorted vector of std::string.
|
||||
*/
|
||||
void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
|
||||
|
||||
/**
|
||||
* Init only for page layout analysis. Use only for calls to SetImage and
|
||||
* AnalysePage. Calls that attempt recognition will generate an error.
|
||||
*/
|
||||
void InitForAnalysePage();
|
||||
|
||||
/**
|
||||
* Read a "config" file containing a set of param, value pairs.
|
||||
* Searches the standard places: tessdata/configs, tessdata/tessconfigs
|
||||
* and also accepts a relative or absolute path name.
|
||||
* Note: only non-init params will be set (init params are set by Init()).
|
||||
*/
|
||||
void ReadConfigFile(const char *filename);
|
||||
/** Same as above, but only set debug params from the given config file. */
|
||||
void ReadDebugConfigFile(const char *filename);
|
||||
|
||||
/**
|
||||
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
|
||||
* The mode is stored as an IntParam so it can also be modified by
|
||||
* ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
|
||||
*/
|
||||
void SetPageSegMode(PageSegMode mode);
|
||||
|
||||
/** Return the current page segmentation mode. */
|
||||
PageSegMode GetPageSegMode() const;
|
||||
|
||||
/**
|
||||
* Recognize a rectangle from an image and return the result as a string.
|
||||
* May be called many times for a single Init.
|
||||
* Currently has no error checking.
|
||||
* Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
|
||||
* Palette color images will not work properly and must be converted to
|
||||
* 24 bit.
|
||||
* Binary images of 1 bit per pixel may also be given but they must be
|
||||
* byte packed with the MSB of the first byte being the first pixel, and a
|
||||
* 1 represents WHITE. For binary images set bytes_per_pixel=0.
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
*
|
||||
* Note that TesseractRect is the simplified convenience interface.
|
||||
* For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
|
||||
* and one or more of the Get*Text functions below.
|
||||
*/
|
||||
char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
|
||||
int bytes_per_line, int left, int top, int width,
|
||||
int height);
|
||||
|
||||
/**
|
||||
* Call between pages or documents etc to free up memory and forget
|
||||
* adaptive data.
|
||||
*/
|
||||
void ClearAdaptiveClassifier();
|
||||
|
||||
/**
|
||||
* @defgroup AdvancedAPI Advanced API
|
||||
* The following methods break TesseractRect into pieces, so you can
|
||||
* get hold of the thresholded image, get the text in different formats,
|
||||
* get bounding boxes, confidences etc.
|
||||
*/
|
||||
/* @{ */
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. Format is as
|
||||
* TesseractRect above. Copies the image buffer and converts to Pix.
|
||||
* SetImage clears all recognition results, and sets the rectangle to the
|
||||
* full image, so it may be followed immediately by a GetUTF8Text, and it
|
||||
* will automatically perform recognition.
|
||||
*/
|
||||
void SetImage(const unsigned char *imagedata, int width, int height,
|
||||
int bytes_per_pixel, int bytes_per_line);
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. As with SetImage above,
|
||||
* Tesseract takes its own copy of the image, so it need not persist until
|
||||
* after Recognize.
|
||||
* Pix vs raw, which to use?
|
||||
* Use Pix where possible. Tesseract uses Pix as its internal representation
|
||||
* and it is therefore more efficient to provide a Pix directly.
|
||||
*/
|
||||
void SetImage(Pix *pix);
|
||||
|
||||
/**
|
||||
* Set the resolution of the source image in pixels per inch so font size
|
||||
* information can be calculated in results. Call this after SetImage().
|
||||
*/
|
||||
void SetSourceResolution(int ppi);
|
||||
|
||||
/**
|
||||
* Restrict recognition to a sub-rectangle of the image. Call after SetImage.
|
||||
* Each SetRectangle clears the recogntion results so multiple rectangles
|
||||
* can be recognized with the same image.
|
||||
*/
|
||||
void SetRectangle(int left, int top, int width, int height);
|
||||
|
||||
/**
|
||||
* Get a copy of the internal thresholded image from Tesseract.
|
||||
* Caller takes ownership of the Pix and must pixDestroy it.
|
||||
* May be called any time after SetImage, or after TesseractRect.
|
||||
*/
|
||||
Pix *GetThresholdedImage();
|
||||
|
||||
/**
|
||||
* Get the result of page layout analysis as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
*/
|
||||
Boxa *GetRegions(Pixa **pixa);
|
||||
|
||||
/**
|
||||
* Get the textlines as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* If raw_image is true, then extract from the original image instead of the
|
||||
* thresholded image and pad by raw_padding pixels.
|
||||
* If blockids is not nullptr, the block-id of each line is also returned as
|
||||
* an array of one element per line. delete [] after use. If paraids is not
|
||||
* nullptr, the paragraph-id of each line within its block is also returned as
|
||||
* an array of one element per line. delete [] after use.
|
||||
*/
|
||||
Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
/*
|
||||
Helper method to extract from the thresholded image. (most common usage)
|
||||
*/
|
||||
Boxa *GetTextlines(Pixa **pixa, int **blockids) {
|
||||
return GetTextlines(false, 0, pixa, blockids, nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
|
||||
* pair, in reading order. Enables downstream handling of non-rectangular
|
||||
* regions.
|
||||
* Can be called before or after Recognize.
|
||||
* If blockids is not nullptr, the block-id of each line is also returned as
|
||||
* an array of one element per line. delete [] after use.
|
||||
*/
|
||||
Boxa *GetStrips(Pixa **pixa, int **blockids);
|
||||
|
||||
/**
|
||||
* Get the words as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
*/
|
||||
Boxa *GetWords(Pixa **pixa);
|
||||
|
||||
/**
|
||||
* Gets the individual connected (text) components (created
|
||||
* after pages segmentation step, but before recognition)
|
||||
* as a leptonica-style Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* Note: the caller is responsible for calling boxaDestroy()
|
||||
* on the returned Boxa array and pixaDestroy() on cc array.
|
||||
*/
|
||||
Boxa *GetConnectedComponents(Pixa **cc);
|
||||
|
||||
/**
|
||||
* Get the given level kind of components (block, textline, word etc.) as a
|
||||
* leptonica-style Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* If blockids is not nullptr, the block-id of each component is also returned
|
||||
* as an array of one element per component. delete [] after use.
|
||||
* If blockids is not nullptr, the paragraph-id of each component with its
|
||||
* block is also returned as an array of one element per component. delete []
|
||||
* after use. If raw_image is true, then portions of the original image are
|
||||
* extracted instead of the thresholded image and padded with raw_padding. If
|
||||
* text_only is true, then only text components are returned.
|
||||
*/
|
||||
Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
|
||||
bool raw_image, int raw_padding, Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
// Helper function to get binary images with no padding (most common usage).
|
||||
Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
|
||||
Pixa **pixa, int **blockids) {
|
||||
return GetComponentImages(level, text_only, false, 0, pixa, blockids,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the scale factor of the thresholded image that would be returned by
|
||||
* GetThresholdedImage() and the various GetX() methods that call
|
||||
* GetComponentImages().
|
||||
* Returns 0 if no thresholder has been set.
|
||||
*/
|
||||
int GetThresholdedImageScaleFactor() const;
|
||||
|
||||
/**
|
||||
* Runs page layout analysis in the mode set by SetPageSegMode.
|
||||
* May optionally be called prior to Recognize to get access to just
|
||||
* the page layout results. Returns an iterator to the results.
|
||||
* If merge_similar_words is true, words are combined where suitable for use
|
||||
* with a line recognizer. Use if you want to use AnalyseLayout to find the
|
||||
* textlines, and then want to process textline fragments with an external
|
||||
* line recognizer.
|
||||
* Returns nullptr on error or an empty page.
|
||||
* The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
PageIterator *AnalyseLayout();
|
||||
PageIterator *AnalyseLayout(bool merge_similar_words);
|
||||
|
||||
/**
|
||||
* Recognize the image from SetAndThresholdImage, generating Tesseract
|
||||
* internal structures. Returns 0 on success.
|
||||
* Optional. The Get*Text functions below will call Recognize if needed.
|
||||
* After Recognize, the output is kept internally until the next SetImage.
|
||||
*/
|
||||
int Recognize(ETEXT_DESC *monitor);
|
||||
|
||||
/**
|
||||
* Methods to retrieve information after SetAndThresholdImage(),
|
||||
* Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
|
||||
*/
|
||||
|
||||
/**
|
||||
* Turns images into symbolic text.
|
||||
*
|
||||
* filename can point to a single image, a multi-page TIFF,
|
||||
* or a plain text list of image filenames.
|
||||
*
|
||||
* retry_config is useful for debugging. If not nullptr, you can fall
|
||||
* back to an alternate configuration if a page fails for some
|
||||
* reason.
|
||||
*
|
||||
* timeout_millisec terminates processing if any single page
|
||||
* takes too long. Set to 0 for unlimited time.
|
||||
*
|
||||
* renderer is responible for creating the output. For example,
|
||||
* use the TessTextRenderer if you want plaintext output, or
|
||||
* the TessPDFRender to produce searchable PDF.
|
||||
*
|
||||
* If tessedit_page_number is non-negative, will only process that
|
||||
* single page. Works for multi-page tiff file, or filelist.
|
||||
*
|
||||
* Returns true if successful, false on error.
|
||||
*/
|
||||
bool ProcessPages(const char *filename, const char *retry_config,
|
||||
int timeout_millisec, TessResultRenderer *renderer);
|
||||
// Does the real work of ProcessPages.
|
||||
bool ProcessPagesInternal(const char *filename, const char *retry_config,
|
||||
int timeout_millisec, TessResultRenderer *renderer);
|
||||
|
||||
/**
|
||||
* Turn a single image into symbolic text.
|
||||
*
|
||||
* The pix is the image processed. filename and page_index are
|
||||
* metadata used by side-effect processes, such as reading a box
|
||||
* file or formatting as hOCR.
|
||||
*
|
||||
* See ProcessPages for descriptions of other parameters.
|
||||
*/
|
||||
bool ProcessPage(Pix *pix, int page_index, const char *filename,
|
||||
const char *retry_config, int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
|
||||
/**
|
||||
* Get a reading-order iterator to the results of LayoutAnalysis and/or
|
||||
* Recognize. The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
ResultIterator *GetIterator();
|
||||
|
||||
/**
|
||||
* Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
|
||||
* The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
MutableIterator *GetMutableIterator();
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetUTF8Text();
|
||||
|
||||
/**
|
||||
* Make a HTML-formatted string with hOCR markup from the internal
|
||||
* data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* monitor can be used to
|
||||
* cancel the recognition
|
||||
* receive progress callbacks
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
|
||||
|
||||
/**
|
||||
* Make a HTML-formatted string with hOCR markup from the internal
|
||||
* data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetHOCRText(int page_number);
|
||||
|
||||
/**
|
||||
* Make an XML-formatted string with Alto markup from the internal
|
||||
* data structures.
|
||||
*/
|
||||
char *GetAltoText(ETEXT_DESC *monitor, int page_number);
|
||||
|
||||
/**
|
||||
* Make an XML-formatted string with Alto markup from the internal
|
||||
* data structures.
|
||||
*/
|
||||
char *GetAltoText(int page_number);
|
||||
|
||||
/**
|
||||
* Make a TSV-formatted string from the internal data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetTSVText(int page_number);
|
||||
|
||||
/**
|
||||
* Make a box file for LSTM training from the internal data structures.
|
||||
* Constructs coordinates in the original image - not just the rectangle.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetLSTMBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded in the same
|
||||
* format as a box file used in training.
|
||||
* Constructs coordinates in the original image - not just the rectangle.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded in the same
|
||||
* format as a WordStr box file used in training.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetWordStrBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UNLV format Latin-1 with specific reject and suspect codes.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetUNLVText();
|
||||
|
||||
/**
|
||||
* Detect the orientation of the input image and apparent script (alphabet).
|
||||
* orient_deg is the detected clockwise rotation of the input image in degrees
|
||||
* (0, 90, 180, 270)
|
||||
* orient_conf is the confidence (15.0 is reasonably confident)
|
||||
* script_name is an ASCII string, the name of the script, e.g. "Latin"
|
||||
* script_conf is confidence level in the script
|
||||
* Returns true on success and writes values to each parameter as an output
|
||||
*/
|
||||
bool DetectOrientationScript(int *orient_deg, float *orient_conf,
|
||||
const char **script_name, float *script_conf);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
* page_number is a 0-based page index that will appear in the osd file.
|
||||
*/
|
||||
char *GetOsdText(int page_number);
|
||||
|
||||
/** Returns the (average) confidence value between 0 and 100. */
|
||||
int MeanTextConf();
|
||||
/**
|
||||
* Returns all word confidences (between 0 and 100) in an array, terminated
|
||||
* by -1. The calling function must delete [] after use.
|
||||
* The number of confidences should correspond to the number of space-
|
||||
* delimited words in GetUTF8Text.
|
||||
*/
|
||||
int *AllWordConfidences();
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
/**
|
||||
* Applies the given word to the adaptive classifier if possible.
|
||||
* The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
|
||||
* tell the boundaries of the graphemes.
|
||||
* Assumes that SetImage/SetRectangle have been used to set the image
|
||||
* to the given word. The mode arg should be PSM_SINGLE_WORD or
|
||||
* PSM_CIRCLE_WORD, as that will be used to control layout analysis.
|
||||
* The currently set PageSegMode is preserved.
|
||||
* Returns false if adaption was not possible for some reason.
|
||||
*/
|
||||
bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Free up recognition results and any stored image data, without actually
|
||||
* freeing any recognition data that would be time-consuming to reload.
|
||||
* Afterwards, you must call SetImage or TesseractRect before doing
|
||||
* any Recognize or Get* operation.
|
||||
*/
|
||||
void Clear();
|
||||
|
||||
/**
|
||||
* Close down tesseract and free up all memory. End() is equivalent to
|
||||
* destructing and reconstructing your TessBaseAPI.
|
||||
* Once End() has been used, none of the other API functions may be used
|
||||
* other than Init and anything declared above it in the class definition.
|
||||
*/
|
||||
void End();
|
||||
|
||||
/**
|
||||
* Clear any library-level memory caches.
|
||||
* There are a variety of expensive-to-load constant data structures (mostly
|
||||
* language dictionaries) that are cached globally -- surviving the Init()
|
||||
* and End() of individual TessBaseAPI's. This function allows the clearing
|
||||
* of these caches.
|
||||
**/
|
||||
static void ClearPersistentCache();
|
||||
|
||||
/**
|
||||
* Check whether a word is valid according to Tesseract's language model
|
||||
* @return 0 if the word is invalid, non-zero if valid.
|
||||
* @warning temporary! This function will be removed from here and placed
|
||||
* in a separate API at some future time.
|
||||
*/
|
||||
int IsValidWord(const char *word) const;
|
||||
// Returns true if utf8_character is defined in the UniCharset.
|
||||
bool IsValidCharacter(const char *utf8_character) const;
|
||||
|
||||
bool GetTextDirection(int *out_offset, float *out_slope);
|
||||
|
||||
/** Sets Dict::letter_is_okay_ function to point to the given function. */
|
||||
void SetDictFunc(DictFunc f);
|
||||
|
||||
/** Sets Dict::probability_in_context_ function to point to the given
|
||||
* function.
|
||||
*/
|
||||
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
|
||||
|
||||
/**
|
||||
* Estimates the Orientation And Script of the image.
|
||||
* @return true if the image was processed successfully.
|
||||
*/
|
||||
bool DetectOS(OSResults *);
|
||||
|
||||
/**
|
||||
* Return text orientation of each block as determined by an earlier run
|
||||
* of layout analysis.
|
||||
*/
|
||||
void GetBlockTextOrientations(int **block_orientation,
|
||||
bool **vertical_writing);
|
||||
|
||||
/** This method returns the string form of the specified unichar. */
|
||||
const char *GetUnichar(int unichar_id) const;
|
||||
|
||||
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
|
||||
const Dawg *GetDawg(int i) const;
|
||||
|
||||
/** Return the number of dawgs loaded into tesseract_ object. */
|
||||
int NumDawgs() const;
|
||||
|
||||
Tesseract *tesseract() const {
|
||||
return tesseract_;
|
||||
}
|
||||
|
||||
OcrEngineMode oem() const {
|
||||
return last_oem_requested_;
|
||||
}
|
||||
|
||||
void set_min_orientation_margin(double margin);
|
||||
/* @} */
|
||||
|
||||
protected:
|
||||
/** Common code for setting the image. Returns true if Init has been called.
|
||||
*/
|
||||
bool InternalSetImage();
|
||||
|
||||
/**
|
||||
* Run the thresholder to make the thresholded image. If pix is not nullptr,
|
||||
* the source is thresholded to pix instead of the internal IMAGE.
|
||||
*/
|
||||
virtual bool Threshold(Pix **pix);
|
||||
|
||||
/**
|
||||
* Find lines from the image making the BLOCK_LIST.
|
||||
* @return 0 on success.
|
||||
*/
|
||||
int FindLines();
|
||||
|
||||
/** Delete the pageres and block list ready for a new page. */
|
||||
void ClearResults();
|
||||
|
||||
/**
|
||||
* Return an LTR Result Iterator -- used only for training, as we really want
|
||||
* to ignore all BiDi smarts at that point.
|
||||
* delete once you're done with it.
|
||||
*/
|
||||
LTRResultIterator *GetLTRIterator();
|
||||
|
||||
/**
|
||||
* Return the length of the output text string, as UTF8, assuming
|
||||
* one newline per line and one per block, with a terminator,
|
||||
* and assuming a single character reject marker for each rejected character.
|
||||
* Also return the number of recognized blobs in blob_count.
|
||||
*/
|
||||
int TextLength(int *blob_count) const;
|
||||
|
||||
//// paragraphs.cpp ////////////////////////////////////////////////////
|
||||
void DetectParagraphs(bool after_text_recognition);
|
||||
|
||||
const PAGE_RES *GetPageRes() const {
|
||||
return page_res_;
|
||||
}
|
||||
|
||||
protected:
|
||||
Tesseract *tesseract_; ///< The underlying data object.
|
||||
Tesseract *osd_tesseract_; ///< For orientation & script detection.
|
||||
EquationDetect *equ_detect_; ///< The equation detector.
|
||||
FileReader reader_; ///< Reads files from any filesystem.
|
||||
ImageThresholder *thresholder_; ///< Image thresholding module.
|
||||
std::vector<ParagraphModel *> *paragraph_models_;
|
||||
BLOCK_LIST *block_list_; ///< The page layout.
|
||||
PAGE_RES *page_res_; ///< The page-level data.
|
||||
std::string input_file_; ///< Name used by training code.
|
||||
std::string output_file_; ///< Name used by debug code.
|
||||
std::string datapath_; ///< Current location of tessdata.
|
||||
std::string language_; ///< Last initialized language.
|
||||
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
|
||||
bool recognition_done_; ///< page_res_ contains recognition data.
|
||||
|
||||
/**
|
||||
* @defgroup ThresholderParams Thresholder Parameters
|
||||
* Parameters saved from the Thresholder. Needed to rebuild coordinates.
|
||||
*/
|
||||
/* @{ */
|
||||
int rect_left_;
|
||||
int rect_top_;
|
||||
int rect_width_;
|
||||
int rect_height_;
|
||||
int image_width_;
|
||||
int image_height_;
|
||||
/* @} */
|
||||
|
||||
private:
|
||||
// A list of image filenames gets special consideration
|
||||
bool ProcessPagesFileList(FILE *fp, std::string *buf,
|
||||
const char *retry_config, int timeout_millisec,
|
||||
TessResultRenderer *renderer,
|
||||
int tessedit_page_number);
|
||||
// TIFF supports multipage so gets special consideration.
|
||||
bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
|
||||
const char *filename, const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer,
|
||||
int tessedit_page_number);
|
||||
}; // class TessBaseAPI.
|
||||
|
||||
/** Escape a char string - remove &<>"' with HTML codes. */
|
||||
std::string HOcrEscape(const char *text);
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_API_BASEAPI_H_
|
|
@ -1,484 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: capi.h
|
||||
// Description: C-API TessBaseAPI
|
||||
//
|
||||
// (C) Copyright 2012, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef API_CAPI_H_
|
||||
#define API_CAPI_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
# include <tesseract/baseapi.h>
|
||||
# include <tesseract/ocrclass.h>
|
||||
# include <tesseract/pageiterator.h>
|
||||
# include <tesseract/renderer.h>
|
||||
# include <tesseract/resultiterator.h>
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef BOOL
|
||||
# define BOOL int
|
||||
# define TRUE 1
|
||||
# define FALSE 0
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
typedef tesseract::TessResultRenderer TessResultRenderer;
|
||||
typedef tesseract::TessBaseAPI TessBaseAPI;
|
||||
typedef tesseract::PageIterator TessPageIterator;
|
||||
typedef tesseract::ResultIterator TessResultIterator;
|
||||
typedef tesseract::MutableIterator TessMutableIterator;
|
||||
typedef tesseract::ChoiceIterator TessChoiceIterator;
|
||||
typedef tesseract::OcrEngineMode TessOcrEngineMode;
|
||||
typedef tesseract::PageSegMode TessPageSegMode;
|
||||
typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
|
||||
typedef tesseract::Orientation TessOrientation;
|
||||
typedef tesseract::ParagraphJustification TessParagraphJustification;
|
||||
typedef tesseract::WritingDirection TessWritingDirection;
|
||||
typedef tesseract::TextlineOrder TessTextlineOrder;
|
||||
typedef tesseract::PolyBlockType TessPolyBlockType;
|
||||
typedef tesseract::ETEXT_DESC ETEXT_DESC;
|
||||
#else
|
||||
typedef struct TessResultRenderer TessResultRenderer;
|
||||
typedef struct TessBaseAPI TessBaseAPI;
|
||||
typedef struct TessPageIterator TessPageIterator;
|
||||
typedef struct TessResultIterator TessResultIterator;
|
||||
typedef struct TessMutableIterator TessMutableIterator;
|
||||
typedef struct TessChoiceIterator TessChoiceIterator;
|
||||
typedef enum TessOcrEngineMode {
|
||||
OEM_TESSERACT_ONLY,
|
||||
OEM_LSTM_ONLY,
|
||||
OEM_TESSERACT_LSTM_COMBINED,
|
||||
OEM_DEFAULT
|
||||
} TessOcrEngineMode;
|
||||
typedef enum TessPageSegMode {
|
||||
PSM_OSD_ONLY,
|
||||
PSM_AUTO_OSD,
|
||||
PSM_AUTO_ONLY,
|
||||
PSM_AUTO,
|
||||
PSM_SINGLE_COLUMN,
|
||||
PSM_SINGLE_BLOCK_VERT_TEXT,
|
||||
PSM_SINGLE_BLOCK,
|
||||
PSM_SINGLE_LINE,
|
||||
PSM_SINGLE_WORD,
|
||||
PSM_CIRCLE_WORD,
|
||||
PSM_SINGLE_CHAR,
|
||||
PSM_SPARSE_TEXT,
|
||||
PSM_SPARSE_TEXT_OSD,
|
||||
PSM_RAW_LINE,
|
||||
PSM_COUNT
|
||||
} TessPageSegMode;
|
||||
typedef enum TessPageIteratorLevel {
|
||||
RIL_BLOCK,
|
||||
RIL_PARA,
|
||||
RIL_TEXTLINE,
|
||||
RIL_WORD,
|
||||
RIL_SYMBOL
|
||||
} TessPageIteratorLevel;
|
||||
typedef enum TessPolyBlockType {
|
||||
PT_UNKNOWN,
|
||||
PT_FLOWING_TEXT,
|
||||
PT_HEADING_TEXT,
|
||||
PT_PULLOUT_TEXT,
|
||||
PT_EQUATION,
|
||||
PT_INLINE_EQUATION,
|
||||
PT_TABLE,
|
||||
PT_VERTICAL_TEXT,
|
||||
PT_CAPTION_TEXT,
|
||||
PT_FLOWING_IMAGE,
|
||||
PT_HEADING_IMAGE,
|
||||
PT_PULLOUT_IMAGE,
|
||||
PT_HORZ_LINE,
|
||||
PT_VERT_LINE,
|
||||
PT_NOISE,
|
||||
PT_COUNT
|
||||
} TessPolyBlockType;
|
||||
typedef enum TessOrientation {
|
||||
ORIENTATION_PAGE_UP,
|
||||
ORIENTATION_PAGE_RIGHT,
|
||||
ORIENTATION_PAGE_DOWN,
|
||||
ORIENTATION_PAGE_LEFT
|
||||
} TessOrientation;
|
||||
typedef enum TessParagraphJustification {
|
||||
JUSTIFICATION_UNKNOWN,
|
||||
JUSTIFICATION_LEFT,
|
||||
JUSTIFICATION_CENTER,
|
||||
JUSTIFICATION_RIGHT
|
||||
} TessParagraphJustification;
|
||||
typedef enum TessWritingDirection {
|
||||
WRITING_DIRECTION_LEFT_TO_RIGHT,
|
||||
WRITING_DIRECTION_RIGHT_TO_LEFT,
|
||||
WRITING_DIRECTION_TOP_TO_BOTTOM
|
||||
} TessWritingDirection;
|
||||
typedef enum TessTextlineOrder {
|
||||
TEXTLINE_ORDER_LEFT_TO_RIGHT,
|
||||
TEXTLINE_ORDER_RIGHT_TO_LEFT,
|
||||
TEXTLINE_ORDER_TOP_TO_BOTTOM
|
||||
} TessTextlineOrder;
|
||||
typedef struct ETEXT_DESC ETEXT_DESC;
|
||||
#endif
|
||||
|
||||
typedef bool (*TessCancelFunc)(void *cancel_this, int words);
|
||||
typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
|
||||
int bottom);
|
||||
|
||||
struct Pix;
|
||||
struct Boxa;
|
||||
struct Pixa;
|
||||
|
||||
/* General free functions */
|
||||
|
||||
TESS_API const char *TessVersion();
|
||||
TESS_API void TessDeleteText(const char *text);
|
||||
TESS_API void TessDeleteTextArray(char **arr);
|
||||
TESS_API void TessDeleteIntArray(const int *arr);
|
||||
|
||||
/* Renderer API */
|
||||
TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
|
||||
BOOL font_info);
|
||||
TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
|
||||
const char *datadir,
|
||||
BOOL textonly);
|
||||
TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
|
||||
const char *outputbase);
|
||||
|
||||
TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
|
||||
TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
|
||||
TessResultRenderer *next);
|
||||
TESS_API TessResultRenderer *TessResultRendererNext(
|
||||
TessResultRenderer *renderer);
|
||||
TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
|
||||
const char *title);
|
||||
TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
|
||||
TessBaseAPI *api);
|
||||
TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
|
||||
|
||||
TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
|
||||
TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
|
||||
TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
|
||||
|
||||
/* Base API */
|
||||
|
||||
TESS_API TessBaseAPI *TessBaseAPICreate();
|
||||
TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
|
||||
|
||||
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
|
||||
|
||||
TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
|
||||
TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
|
||||
TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
|
||||
TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
|
||||
|
||||
TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
|
||||
const char *value);
|
||||
TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
|
||||
const char *value);
|
||||
|
||||
TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
|
||||
const char *name, int *value);
|
||||
TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
|
||||
const char *name, BOOL *value);
|
||||
TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
|
||||
const char *name, double *value);
|
||||
TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
|
||||
const char *name);
|
||||
|
||||
TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
|
||||
TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
|
||||
TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode oem,
|
||||
char **configs, int configs_size);
|
||||
TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode oem);
|
||||
TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language);
|
||||
|
||||
TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode mode,
|
||||
char **configs, int configs_size, char **vars_vec,
|
||||
char **vars_values, size_t vars_vec_size,
|
||||
BOOL set_only_non_debug_params);
|
||||
|
||||
TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
|
||||
const char *language, TessOcrEngineMode mode,
|
||||
char **configs, int configs_size, char **vars_vec,
|
||||
char **vars_values, size_t vars_vec_size,
|
||||
BOOL set_only_non_debug_params);
|
||||
|
||||
TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
|
||||
const TessBaseAPI *handle);
|
||||
TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
|
||||
const TessBaseAPI *handle);
|
||||
TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
|
||||
const TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
|
||||
TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
|
||||
TessPageSegMode mode);
|
||||
TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
|
||||
const unsigned char *imagedata,
|
||||
int bytes_per_pixel, int bytes_per_line,
|
||||
int left, int top, int width, int height);
|
||||
|
||||
TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
|
||||
const unsigned char *imagedata, int width,
|
||||
int height, int bytes_per_pixel,
|
||||
int bytes_per_line);
|
||||
TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
|
||||
|
||||
TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
|
||||
|
||||
TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
|
||||
int width, int height);
|
||||
|
||||
TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
|
||||
TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
|
||||
struct Pixa **pixa);
|
||||
TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
|
||||
struct Pixa **pixa,
|
||||
int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
|
||||
BOOL raw_image, int raw_padding,
|
||||
struct Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
|
||||
struct Pixa **pixa, int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
|
||||
struct Pixa **pixa);
|
||||
TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
|
||||
struct Pixa **cc);
|
||||
TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
|
||||
TessPageIteratorLevel level,
|
||||
BOOL text_only,
|
||||
struct Pixa **pixa,
|
||||
int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
|
||||
TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
|
||||
BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
|
||||
int **paraids);
|
||||
|
||||
TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
|
||||
const TessBaseAPI *handle);
|
||||
|
||||
TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
|
||||
|
||||
TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
|
||||
const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
|
||||
int page_index, const char *filename,
|
||||
const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
|
||||
TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
|
||||
TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
|
||||
TessBaseAPI *handle);
|
||||
|
||||
TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
|
||||
TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
|
||||
int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
|
||||
TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
|
||||
TessPageSegMode mode,
|
||||
const char *wordstr);
|
||||
#endif // #ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
|
||||
TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
|
||||
TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
|
||||
float *out_slope);
|
||||
|
||||
TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
|
||||
|
||||
TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
// Call TessDeleteText(*best_script_name) to free memory allocated by this
|
||||
// function
|
||||
TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
|
||||
int *orient_deg,
|
||||
float *orient_conf,
|
||||
const char **script_name,
|
||||
float *script_conf);
|
||||
#endif // #ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
|
||||
double margin);
|
||||
|
||||
TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
|
||||
int **block_orientation,
|
||||
bool **vertical_writing);
|
||||
|
||||
/* Page iterator */
|
||||
|
||||
TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
|
||||
|
||||
TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
|
||||
|
||||
TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
|
||||
|
||||
TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
|
||||
TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
|
||||
TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
TessPageIteratorLevel element);
|
||||
|
||||
TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
int *left, int *top, int *right,
|
||||
int *bottom);
|
||||
|
||||
TESS_API TessPolyBlockType
|
||||
TessPageIteratorBlockType(const TessPageIterator *handle);
|
||||
|
||||
TESS_API struct Pix *TessPageIteratorGetBinaryImage(
|
||||
const TessPageIterator *handle, TessPageIteratorLevel level);
|
||||
|
||||
TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
int padding,
|
||||
struct Pix *original_image,
|
||||
int *left, int *top);
|
||||
|
||||
TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level, int *x1,
|
||||
int *y1, int *x2, int *y2);
|
||||
|
||||
TESS_API void TessPageIteratorOrientation(
|
||||
TessPageIterator *handle, TessOrientation *orientation,
|
||||
TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
|
||||
float *deskew_angle);
|
||||
|
||||
TESS_API void TessPageIteratorParagraphInfo(
|
||||
TessPageIterator *handle, TessParagraphJustification *justification,
|
||||
BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
|
||||
|
||||
/* Result iterator */
|
||||
|
||||
TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
|
||||
TESS_API TessResultIterator *TessResultIteratorCopy(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
|
||||
TessResultIterator *handle);
|
||||
TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
|
||||
const TessResultIterator *handle);
|
||||
|
||||
TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API const char *TessResultIteratorWordRecognitionLanguage(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API const char *TessResultIteratorWordFontAttributes(
|
||||
const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
|
||||
BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
|
||||
int *pointsize, int *font_id);
|
||||
|
||||
TESS_API BOOL
|
||||
TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
|
||||
TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
|
||||
|
||||
TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
|
||||
TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
|
||||
TESS_API const char *TessChoiceIteratorGetUTF8Text(
|
||||
const TessChoiceIterator *handle);
|
||||
TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
|
||||
|
||||
/* Progress monitor */
|
||||
|
||||
TESS_API ETEXT_DESC *TessMonitorCreate();
|
||||
TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
|
||||
TessCancelFunc cancelFunc);
|
||||
TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
|
||||
TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
|
||||
TessProgressFunc progressFunc);
|
||||
TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // API_CAPI_H_
|
|
@ -1,37 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: export.h
|
||||
// Description: Place holder
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_PLATFORM_H_
|
||||
#define TESSERACT_PLATFORM_H_
|
||||
|
||||
#ifndef TESS_API
|
||||
# if defined(_WIN32) || defined(__CYGWIN__)
|
||||
# if defined(TESS_EXPORTS)
|
||||
# define TESS_API __declspec(dllexport)
|
||||
# elif defined(TESS_IMPORTS)
|
||||
# define TESS_API __declspec(dllimport)
|
||||
# else
|
||||
# define TESS_API
|
||||
# endif
|
||||
# else
|
||||
# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
|
||||
# define TESS_API __attribute__((visibility("default")))
|
||||
# else
|
||||
# define TESS_API
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif // TESSERACT_PLATFORM_H_
|
|
@ -1,235 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: ltrresultiterator.h
|
||||
// Description: Iterator for tesseract results in strict left-to-right
|
||||
// order that avoids using tesseract internal data structures.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
||||
|
||||
#include "export.h" // for TESS_API
|
||||
#include "pageiterator.h" // for PageIterator
|
||||
#include "publictypes.h" // for PageIteratorLevel
|
||||
#include "unichar.h" // for StrongScriptDirection
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class BLOB_CHOICE_IT;
|
||||
class PAGE_RES;
|
||||
class WERD_RES;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
// Class to iterate over tesseract results, providing access to all levels
|
||||
// of the page hierarchy, without including any tesseract headers or having
|
||||
// to handle any tesseract structures.
|
||||
// WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
// therefore can only be used while the TessBaseAPI class still exists and
|
||||
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
// DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
// See tesseract/publictypes.h for the definition of PageIteratorLevel.
|
||||
// See also base class PageIterator, which contains the bulk of the interface.
|
||||
// LTRResultIterator adds text-specific methods for access to OCR output.
|
||||
|
||||
class TESS_API LTRResultIterator : public PageIterator {
|
||||
friend class ChoiceIterator;
|
||||
|
||||
public:
|
||||
// page_res and tesseract come directly from the BaseAPI.
|
||||
// The rectangle parameters are copied indirectly from the Thresholder,
|
||||
// via the BaseAPI. They represent the coordinates of some rectangle in an
|
||||
// original image (in top-left-origin coordinates) and therefore the top-left
|
||||
// needs to be added to any output boxes in order to specify coordinates
|
||||
// in the original image. See TessBaseAPI::SetRectangle.
|
||||
// The scale and scaled_yres are in case the Thresholder scaled the image
|
||||
// rectangle prior to thresholding. Any coordinates in tesseract's image
|
||||
// must be divided by scale before adding (rect_left, rect_top).
|
||||
// The scaled_yres indicates the effective resolution of the binary image
|
||||
// that tesseract has been given by the Thresholder.
|
||||
// After the constructor, Begin has already been called.
|
||||
LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
|
||||
int scaled_yres, int rect_left, int rect_top,
|
||||
int rect_width, int rect_height);
|
||||
|
||||
~LTRResultIterator() override;
|
||||
|
||||
// LTRResultIterators may be copied! This makes it possible to iterate over
|
||||
// all the objects at a lower level, while maintaining an iterator to
|
||||
// objects at a higher level. These constructors DO NOT CALL Begin, so
|
||||
// iterations will continue from the location of src.
|
||||
// TODO: For now the copy constructor and operator= only need the base class
|
||||
// versions, but if new data members are added, don't forget to add them!
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
|
||||
// See PageIterator.
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
// Returns the null terminated UTF-8 encoded text string for the current
|
||||
// object at the given level. Use delete [] to free after use.
|
||||
char *GetUTF8Text(PageIteratorLevel level) const;
|
||||
|
||||
// Set the string inserted at the end of each text line. "\n" by default.
|
||||
void SetLineSeparator(const char *new_line);
|
||||
|
||||
// Set the string inserted at the end of each paragraph. "\n" by default.
|
||||
void SetParagraphSeparator(const char *new_para);
|
||||
|
||||
// Returns the mean confidence of the current object at the given level.
|
||||
// The number should be interpreted as a percent probability. (0.0f-100.0f)
|
||||
float Confidence(PageIteratorLevel level) const;
|
||||
|
||||
// ============= Functions that refer to words only ============.
|
||||
|
||||
// Returns the font attributes of the current word. If iterating at a higher
|
||||
// level object than words, eg textlines, then this will return the
|
||||
// attributes of the first word in that textline.
|
||||
// The actual return value is a string representing a font name. It points
|
||||
// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
|
||||
// the iterator itself, ie rendered invalid by various members of
|
||||
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
|
||||
// Pointsize is returned in printers points (1/72 inch.)
|
||||
const char *WordFontAttributes(bool *is_bold, bool *is_italic,
|
||||
bool *is_underlined, bool *is_monospace,
|
||||
bool *is_serif, bool *is_smallcaps,
|
||||
int *pointsize, int *font_id) const;
|
||||
|
||||
// Return the name of the language used to recognize this word.
|
||||
// On error, nullptr. Do not delete this pointer.
|
||||
const char *WordRecognitionLanguage() const;
|
||||
|
||||
// Return the overall directionality of this word.
|
||||
StrongScriptDirection WordDirection() const;
|
||||
|
||||
// Returns true if the current word was found in a dictionary.
|
||||
bool WordIsFromDictionary() const;
|
||||
|
||||
// Returns the number of blanks before the current word.
|
||||
int BlanksBeforeWord() const;
|
||||
|
||||
// Returns true if the current word is numeric.
|
||||
bool WordIsNumeric() const;
|
||||
|
||||
// Returns true if the word contains blamer information.
|
||||
bool HasBlamerInfo() const;
|
||||
|
||||
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
|
||||
// of the current word.
|
||||
const void *GetParamsTrainingBundle() const;
|
||||
|
||||
// Returns a pointer to the string with blamer information for this word.
|
||||
// Assumes that the word's blamer_bundle is not nullptr.
|
||||
const char *GetBlamerDebug() const;
|
||||
|
||||
// Returns a pointer to the string with misadaption information for this word.
|
||||
// Assumes that the word's blamer_bundle is not nullptr.
|
||||
const char *GetBlamerMisadaptionDebug() const;
|
||||
|
||||
// Returns true if a truth string was recorded for the current word.
|
||||
bool HasTruthString() const;
|
||||
|
||||
// Returns true if the given string is equivalent to the truth string for
|
||||
// the current word.
|
||||
bool EquivalentToTruth(const char *str) const;
|
||||
|
||||
// Returns a null terminated UTF-8 encoded truth string for the current word.
|
||||
// Use delete [] to free after use.
|
||||
char *WordTruthUTF8Text() const;
|
||||
|
||||
// Returns a null terminated UTF-8 encoded normalized OCR string for the
|
||||
// current word. Use delete [] to free after use.
|
||||
char *WordNormedUTF8Text() const;
|
||||
|
||||
// Returns a pointer to serialized choice lattice.
|
||||
// Fills lattice_size with the number of bytes in lattice data.
|
||||
const char *WordLattice(int *lattice_size) const;
|
||||
|
||||
// ============= Functions that refer to symbols only ============.
|
||||
|
||||
// Returns true if the current symbol is a superscript.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsSuperscript() const;
|
||||
// Returns true if the current symbol is a subscript.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsSubscript() const;
|
||||
// Returns true if the current symbol is a dropcap.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsDropcap() const;
|
||||
|
||||
protected:
|
||||
const char *line_separator_;
|
||||
const char *paragraph_separator_;
|
||||
};
|
||||
|
||||
// Class to iterate over the classifier choices for a single RIL_SYMBOL.
|
||||
class TESS_API ChoiceIterator {
|
||||
public:
|
||||
// Construction is from a LTRResultIterator that points to the symbol of
|
||||
// interest. The ChoiceIterator allows a one-shot iteration over the
|
||||
// choices for this symbol and after that it is useless.
|
||||
explicit ChoiceIterator(const LTRResultIterator &result_it);
|
||||
~ChoiceIterator();
|
||||
|
||||
// Moves to the next choice for the symbol and returns false if there
|
||||
// are none left.
|
||||
bool Next();
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
// Returns the null terminated UTF-8 encoded text string for the current
|
||||
// choice.
|
||||
// NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
|
||||
// internal structure and should NOT be delete[]ed to free after use.
|
||||
const char *GetUTF8Text() const;
|
||||
|
||||
// Returns the confidence of the current choice depending on the used language
|
||||
// data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
|
||||
// choices for one symbol should roughly add up to 1.0f.
|
||||
// If only traineddata of the legacy engine is used, the number should be
|
||||
// interpreted as a percent probability. (0.0f-100.0f) In this case
|
||||
// probabilities won't add up to 100. Each one stands on its own.
|
||||
float Confidence() const;
|
||||
|
||||
// Returns a vector containing all timesteps, which belong to the currently
|
||||
// selected symbol. A timestep is a vector containing pairs of symbols and
|
||||
// floating point numbers. The number states the probability for the
|
||||
// corresponding symbol.
|
||||
std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
|
||||
|
||||
private:
|
||||
// clears the remaining spaces out of the results and adapt the probabilities
|
||||
void filterSpaces();
|
||||
// Pointer to the WERD_RES object owned by the API.
|
||||
WERD_RES *word_res_;
|
||||
// Iterator over the blob choices.
|
||||
BLOB_CHOICE_IT *choice_it_;
|
||||
std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
|
||||
std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
|
||||
|
||||
const int *tstep_index_;
|
||||
// regulates the rating granularity
|
||||
double rating_coefficient_;
|
||||
// leading blanks
|
||||
int blanks_before_word_;
|
||||
// true when there is lstm engine related trained data
|
||||
bool oemLSTM_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
|
@ -1,158 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
/**********************************************************************
|
||||
* File: ocrclass.h
|
||||
* Description: Class definitions and constants for the OCR API.
|
||||
* Author: Hewlett-Packard Co
|
||||
*
|
||||
* (C) Copyright 1996, Hewlett-Packard Co.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
/**********************************************************************
|
||||
* This file contains typedefs for all the structures used by
|
||||
* the HP OCR interface.
|
||||
* The structures are designed to allow them to be used with any
|
||||
* structure alignment up to 8.
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef CCUTIL_OCRCLASS_H_
|
||||
#define CCUTIL_OCRCLASS_H_
|
||||
|
||||
#include <chrono>
|
||||
#include <ctime>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/**********************************************************************
|
||||
* EANYCODE_CHAR
|
||||
* Description of a single character. The character code is defined by
|
||||
* the character set of the current font.
|
||||
* Output text is sent as an array of these structures.
|
||||
* Spaces and line endings in the output are represented in the
|
||||
* structures of the surrounding characters. They are not directly
|
||||
* represented as characters.
|
||||
* The first character in a word has a positive value of blanks.
|
||||
* Missing information should be set to the defaults in the comments.
|
||||
* If word bounds are known, but not character bounds, then the top and
|
||||
* bottom of each character should be those of the word. The left of the
|
||||
* first and right of the last char in each word should be set. All other
|
||||
* lefts and rights should be set to -1.
|
||||
* If set, the values of right and bottom are left+width and top+height.
|
||||
* Most of the members come directly from the parameters to ocr_append_char.
|
||||
* The formatting member uses the enhancement parameter and combines the
|
||||
* line direction stuff into the top 3 bits.
|
||||
* The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
|
||||
* 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
|
||||
* the coding is, only that it is backwards compatible with the previous
|
||||
* version.
|
||||
**********************************************************************/
|
||||
|
||||
struct EANYCODE_CHAR { /*single character */
|
||||
// It should be noted that the format for char_code for version 2.0 and beyond
|
||||
// is UTF8 which means that ASCII characters will come out as one structure
|
||||
// but other characters will be returned in two or more instances of this
|
||||
// structure with a single byte of the UTF8 code in each, but each will have
|
||||
// the same bounding box. Programs which want to handle languagues with
|
||||
// different characters sets will need to handle extended characters
|
||||
// appropriately, but *all* code needs to be prepared to receive UTF8 coded
|
||||
// characters for characters such as bullet and fancy quotes.
|
||||
uint16_t char_code; /*character itself */
|
||||
int16_t left; /*of char (-1) */
|
||||
int16_t right; /*of char (-1) */
|
||||
int16_t top; /*of char (-1) */
|
||||
int16_t bottom; /*of char (-1) */
|
||||
int16_t font_index; /*what font (0) */
|
||||
uint8_t confidence; /*0=perfect, 100=reject (0/100) */
|
||||
uint8_t point_size; /*of char, 72=i inch, (10) */
|
||||
int8_t blanks; /*no of spaces before this char (1) */
|
||||
uint8_t formatting; /*char formatting (0) */
|
||||
};
|
||||
|
||||
/**********************************************************************
|
||||
* ETEXT_DESC
|
||||
* Description of the output of the OCR engine.
|
||||
* This structure is used as both a progress monitor and the final
|
||||
* output header, since it needs to be a valid progress monitor while
|
||||
* the OCR engine is storing its output to shared memory.
|
||||
* During progress, all the buffer info is -1.
|
||||
* Progress starts at 0 and increases to 100 during OCR. No other constraint.
|
||||
* Additionally the progress callback contains the bounding box of the word that
|
||||
* is currently being processed.
|
||||
* Every progress callback, the OCR engine must set ocr_alive to 1.
|
||||
* The HP side will set ocr_alive to 0. Repeated failure to reset
|
||||
* to 1 indicates that the OCR engine is dead.
|
||||
* If the cancel function is not null then it is called with the number of
|
||||
* user words found. If it returns true then operation is cancelled.
|
||||
**********************************************************************/
|
||||
class ETEXT_DESC;
|
||||
|
||||
using CANCEL_FUNC = bool (*)(void *, int);
|
||||
using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
|
||||
using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
|
||||
|
||||
class ETEXT_DESC { // output header
|
||||
public:
|
||||
int16_t count{0}; /// chars in this buffer(0)
|
||||
int16_t progress{0}; /// percent complete increasing (0-100)
|
||||
/** Progress monitor covers word recognition and it does not cover layout
|
||||
* analysis.
|
||||
* See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
|
||||
int8_t more_to_come{0}; /// true if not last
|
||||
volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
|
||||
int8_t err_code{0}; /// for errcode use
|
||||
CANCEL_FUNC cancel{nullptr}; /// returns true to cancel
|
||||
PROGRESS_FUNC progress_callback{
|
||||
nullptr}; /// called whenever progress increases
|
||||
PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
|
||||
void *cancel_this{nullptr}; /// this or other data for cancel
|
||||
std::chrono::steady_clock::time_point end_time;
|
||||
/// Time to stop. Expected to be set only
|
||||
/// by call to set_deadline_msecs().
|
||||
EANYCODE_CHAR text[1]{}; /// character data
|
||||
|
||||
ETEXT_DESC() : progress_callback2(&default_progress_func) {
|
||||
end_time = std::chrono::time_point<std::chrono::steady_clock,
|
||||
std::chrono::milliseconds>();
|
||||
}
|
||||
|
||||
// Sets the end time to be deadline_msecs milliseconds from now.
|
||||
void set_deadline_msecs(int32_t deadline_msecs) {
|
||||
if (deadline_msecs > 0) {
|
||||
end_time = std::chrono::steady_clock::now() +
|
||||
std::chrono::milliseconds(deadline_msecs);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns false if we've not passed the end_time, or have not set a deadline.
|
||||
bool deadline_exceeded() const {
|
||||
if (end_time.time_since_epoch() ==
|
||||
std::chrono::steady_clock::duration::zero()) {
|
||||
return false;
|
||||
}
|
||||
auto now = std::chrono::steady_clock::now();
|
||||
return (now > end_time);
|
||||
}
|
||||
|
||||
private:
|
||||
static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
|
||||
int top, int bottom) {
|
||||
if (ths->progress_callback != nullptr) {
|
||||
return (*(ths->progress_callback))(ths->progress, left, right, top,
|
||||
bottom);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // CCUTIL_OCRCLASS_H_
|
|
@ -1,139 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: osdetect.h
|
||||
// Description: Orientation and script detection.
|
||||
// Author: Samuel Charron
|
||||
// Ranjith Unnikrishnan
|
||||
//
|
||||
// (C) Copyright 2008, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_OSDETECT_H_
|
||||
#define TESSERACT_CCMAIN_OSDETECT_H_
|
||||
|
||||
#include "export.h" // for TESS_API
|
||||
|
||||
#include <vector> // for std::vector
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class BLOBNBOX;
|
||||
class BLOBNBOX_CLIST;
|
||||
class BLOB_CHOICE_LIST;
|
||||
class TO_BLOCK_LIST;
|
||||
class UNICHARSET;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
|
||||
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
|
||||
|
||||
struct OSBestResult {
|
||||
OSBestResult()
|
||||
: orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
|
||||
int orientation_id;
|
||||
int script_id;
|
||||
float sconfidence;
|
||||
float oconfidence;
|
||||
};
|
||||
|
||||
struct OSResults {
|
||||
OSResults() : unicharset(nullptr) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
|
||||
scripts_na[i][j] = 0;
|
||||
}
|
||||
orientations[i] = 0;
|
||||
}
|
||||
}
|
||||
void update_best_orientation();
|
||||
// Set the estimate of the orientation to the given id.
|
||||
void set_best_orientation(int orientation_id);
|
||||
// Update/Compute the best estimate of the script assuming the given
|
||||
// orientation id.
|
||||
void update_best_script(int orientation_id);
|
||||
// Return the index of the script with the highest score for this orientation.
|
||||
TESS_API int get_best_script(int orientation_id) const;
|
||||
// Accumulate scores with given OSResults instance and update the best script.
|
||||
void accumulate(const OSResults &osr);
|
||||
|
||||
// Print statistics.
|
||||
void print_scores(void) const;
|
||||
void print_scores(int orientation_id) const;
|
||||
|
||||
// Array holding scores for each orientation id [0,3].
|
||||
// Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
|
||||
// page respectively, where the values refer to the amount of clockwise
|
||||
// rotation to be applied to the page for the text to be upright and readable.
|
||||
float orientations[4];
|
||||
// Script confidence scores for each of 4 possible orientations.
|
||||
float scripts_na[4][kMaxNumberOfScripts];
|
||||
|
||||
UNICHARSET *unicharset;
|
||||
OSBestResult best_result;
|
||||
};
|
||||
|
||||
class OrientationDetector {
|
||||
public:
|
||||
OrientationDetector(const std::vector<int> *allowed_scripts,
|
||||
OSResults *results);
|
||||
bool detect_blob(BLOB_CHOICE_LIST *scores);
|
||||
int get_orientation();
|
||||
|
||||
private:
|
||||
OSResults *osr_;
|
||||
const std::vector<int> *allowed_scripts_;
|
||||
};
|
||||
|
||||
class ScriptDetector {
|
||||
public:
|
||||
ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
void detect_blob(BLOB_CHOICE_LIST *scores);
|
||||
bool must_stop(int orientation) const;
|
||||
|
||||
private:
|
||||
OSResults *osr_;
|
||||
static const char *korean_script_;
|
||||
static const char *japanese_script_;
|
||||
static const char *fraktur_script_;
|
||||
int korean_id_;
|
||||
int japanese_id_;
|
||||
int katakana_id_;
|
||||
int hiragana_id_;
|
||||
int han_id_;
|
||||
int hangul_id_;
|
||||
int latin_id_;
|
||||
int fraktur_id_;
|
||||
tesseract::Tesseract *tess_;
|
||||
const std::vector<int> *allowed_scripts_;
|
||||
};
|
||||
|
||||
int orientation_and_script_detection(const char *filename, OSResults *,
|
||||
tesseract::Tesseract *);
|
||||
|
||||
int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
|
||||
int os_detect_blobs(const std::vector<int> *allowed_scripts,
|
||||
BLOBNBOX_CLIST *blob_list, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
|
||||
bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
|
||||
OSResults *, tesseract::Tesseract *tess);
|
||||
|
||||
// Helper method to convert an orientation index to its value in degrees.
|
||||
// The value represents the amount of clockwise rotation in degrees that must be
|
||||
// applied for the text to be upright (readable).
|
||||
TESS_API int OrientationIdToValue(const int &id);
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCMAIN_OSDETECT_H_
|
|
@ -1,364 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: pageiterator.h
|
||||
// Description: Iterator for tesseract page structure that avoids using
|
||||
// tesseract internal data structures.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_PAGEITERATOR_H_
|
||||
|
||||
#include "export.h"
|
||||
#include "publictypes.h"
|
||||
|
||||
struct Pix;
|
||||
struct Pta;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
struct BlamerBundle;
|
||||
class C_BLOB_IT;
|
||||
class PAGE_RES;
|
||||
class PAGE_RES_IT;
|
||||
class WERD;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
/**
|
||||
* Class to iterate over tesseract page structure, providing access to all
|
||||
* levels of the page hierarchy, without including any tesseract headers or
|
||||
* having to handle any tesseract structures.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
* See tesseract/publictypes.h for the definition of PageIteratorLevel.
|
||||
* See also ResultIterator, derived from PageIterator, which adds in the
|
||||
* ability to access OCR output with text-specific methods.
|
||||
*/
|
||||
|
||||
class TESS_API PageIterator {
|
||||
public:
|
||||
/**
|
||||
* page_res and tesseract come directly from the BaseAPI.
|
||||
* The rectangle parameters are copied indirectly from the Thresholder,
|
||||
* via the BaseAPI. They represent the coordinates of some rectangle in an
|
||||
* original image (in top-left-origin coordinates) and therefore the top-left
|
||||
* needs to be added to any output boxes in order to specify coordinates
|
||||
* in the original image. See TessBaseAPI::SetRectangle.
|
||||
* The scale and scaled_yres are in case the Thresholder scaled the image
|
||||
* rectangle prior to thresholding. Any coordinates in tesseract's image
|
||||
* must be divided by scale before adding (rect_left, rect_top).
|
||||
* The scaled_yres indicates the effective resolution of the binary image
|
||||
* that tesseract has been given by the Thresholder.
|
||||
* After the constructor, Begin has already been called.
|
||||
*/
|
||||
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
|
||||
int scaled_yres, int rect_left, int rect_top, int rect_width,
|
||||
int rect_height);
|
||||
virtual ~PageIterator();
|
||||
|
||||
/**
|
||||
* Page/ResultIterators may be copied! This makes it possible to iterate over
|
||||
* all the objects at a lower level, while maintaining an iterator to
|
||||
* objects at a higher level. These constructors DO NOT CALL Begin, so
|
||||
* iterations will continue from the location of src.
|
||||
*/
|
||||
PageIterator(const PageIterator &src);
|
||||
const PageIterator &operator=(const PageIterator &src);
|
||||
|
||||
/** Are we positioned at the same location as other? */
|
||||
bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
|
||||
/**
|
||||
* Moves the iterator to point to the start of the page to begin an
|
||||
* iteration.
|
||||
*/
|
||||
virtual void Begin();
|
||||
|
||||
/**
|
||||
* Moves the iterator to the beginning of the paragraph.
|
||||
* This class implements this functionality by moving it to the zero indexed
|
||||
* blob of the first (leftmost) word on the first row of the paragraph.
|
||||
*/
|
||||
virtual void RestartParagraph();
|
||||
|
||||
/**
|
||||
* Return whether this iterator points anywhere in the first textline of a
|
||||
* paragraph.
|
||||
*/
|
||||
bool IsWithinFirstTextlineOfParagraph() const;
|
||||
|
||||
/**
|
||||
* Moves the iterator to the beginning of the text line.
|
||||
* This class implements this functionality by moving it to the zero indexed
|
||||
* blob of the first (leftmost) word of the row.
|
||||
*/
|
||||
virtual void RestartRow();
|
||||
|
||||
/**
|
||||
* Moves to the start of the next object at the given level in the
|
||||
* page hierarchy, and returns false if the end of the page was reached.
|
||||
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
|
||||
* PageIteratorLevel level values will visit each non-text block once.
|
||||
* Think of non text blocks as containing a single para, with a single line,
|
||||
* with a single imaginary word.
|
||||
* Calls to Next with different levels may be freely intermixed.
|
||||
* This function iterates words in right-to-left scripts correctly, if
|
||||
* the appropriate language has been loaded into Tesseract.
|
||||
*/
|
||||
virtual bool Next(PageIteratorLevel level);
|
||||
|
||||
/**
|
||||
* Returns true if the iterator is at the start of an object at the given
|
||||
* level.
|
||||
*
|
||||
* For instance, suppose an iterator it is pointed to the first symbol of the
|
||||
* first word of the third line of the second paragraph of the first block in
|
||||
* a page, then:
|
||||
* it.IsAtBeginningOf(RIL_BLOCK) = false
|
||||
* it.IsAtBeginningOf(RIL_PARA) = false
|
||||
* it.IsAtBeginningOf(RIL_TEXTLINE) = true
|
||||
* it.IsAtBeginningOf(RIL_WORD) = true
|
||||
* it.IsAtBeginningOf(RIL_SYMBOL) = true
|
||||
*/
|
||||
virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns whether the iterator is positioned at the last element in a
|
||||
* given level. (e.g. the last word in a line, the last line in a block)
|
||||
*
|
||||
* Here's some two-paragraph example
|
||||
* text. It starts off innocuously
|
||||
* enough but quickly turns bizarre.
|
||||
* The author inserts a cornucopia
|
||||
* of words to guard against confused
|
||||
* references.
|
||||
*
|
||||
* Now take an iterator it pointed to the start of "bizarre."
|
||||
* it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
|
||||
* it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
|
||||
* it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
|
||||
*/
|
||||
virtual bool IsAtFinalElement(PageIteratorLevel level,
|
||||
PageIteratorLevel element) const;
|
||||
|
||||
/**
|
||||
* Returns whether this iterator is positioned
|
||||
* before other: -1
|
||||
* equal to other: 0
|
||||
* after other: 1
|
||||
*/
|
||||
int Cmp(const PageIterator &other) const;
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
// Coordinate system:
|
||||
// Integer coordinates are at the cracks between the pixels.
|
||||
// The top-left corner of the top-left pixel in the image is at (0,0).
|
||||
// The bottom-right corner of the bottom-right pixel in the image is at
|
||||
// (width, height).
|
||||
// Every bounding box goes from the top-left of the top-left contained
|
||||
// pixel to the bottom-right of the bottom-right contained pixel, so
|
||||
// the bounding box of the single top-left pixel in the image is:
|
||||
// (0,0)->(1,1).
|
||||
// If an image rectangle has been set in the API, then returned coordinates
|
||||
// relate to the original (full) image, rather than the rectangle.
|
||||
|
||||
/**
|
||||
* Controls what to include in a bounding box. Bounding boxes of all levels
|
||||
* between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
|
||||
* Between layout analysis and recognition, it isn't known where all
|
||||
* diacritics belong, so this control is used to include or exclude some
|
||||
* diacritics that are above or below the main body of the word. In most cases
|
||||
* where the placement is obvious, and after recognition, it doesn't make as
|
||||
* much difference, as the diacritics will already be included in the word.
|
||||
*/
|
||||
void SetBoundingBoxComponents(bool include_upper_dots,
|
||||
bool include_lower_dots) {
|
||||
include_upper_dots_ = include_upper_dots;
|
||||
include_lower_dots_ = include_lower_dots;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the bounding rectangle of the current object at the given level.
|
||||
* See comment on coordinate system above.
|
||||
* Returns false if there is no such object at the current position.
|
||||
* The returned bounding box is guaranteed to match the size and position
|
||||
* of the image returned by GetBinaryImage, but may clip foreground pixels
|
||||
* from a grey image. The padding argument to GetImage can be used to expand
|
||||
* the image to include more foreground pixels. See GetImage below.
|
||||
*/
|
||||
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
|
||||
int *bottom) const;
|
||||
bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
|
||||
int *right, int *bottom) const;
|
||||
/**
|
||||
* Returns the bounding rectangle of the object in a coordinate system of the
|
||||
* working image rectangle having its origin at (rect_left_, rect_top_) with
|
||||
* respect to the original image and is scaled by a factor scale_.
|
||||
*/
|
||||
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
|
||||
int *right, int *bottom) const;
|
||||
|
||||
/** Returns whether there is no object of a given level. */
|
||||
bool Empty(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns the type of the current block.
|
||||
* See tesseract/publictypes.h for PolyBlockType.
|
||||
*/
|
||||
PolyBlockType BlockType() const;
|
||||
|
||||
/**
|
||||
* Returns the polygon outline of the current block. The returned Pta must
|
||||
* be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
|
||||
* of the polygon, and the last edge is the line segment between the last
|
||||
* point and the first point. nullptr will be returned if the iterator is
|
||||
* at the end of the document or layout analysis was not used.
|
||||
*/
|
||||
Pta *BlockPolygon() const;
|
||||
|
||||
/**
|
||||
* Returns a binary image of the current object at the given level.
|
||||
* The position and size match the return from BoundingBoxInternal, and so
|
||||
* this could be upscaled with respect to the original input image.
|
||||
* Use pixDestroy to delete the image after use.
|
||||
*/
|
||||
Pix *GetBinaryImage(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns an image of the current object at the given level in greyscale
|
||||
* if available in the input. To guarantee a binary image use BinaryImage.
|
||||
* NOTE that in order to give the best possible image, the bounds are
|
||||
* expanded slightly over the binary connected component, by the supplied
|
||||
* padding, so the top-left position of the returned image is returned
|
||||
* in (left,top). These will most likely not match the coordinates
|
||||
* returned by BoundingBox.
|
||||
* If you do not supply an original image, you will get a binary one.
|
||||
* Use pixDestroy to delete the image after use.
|
||||
*/
|
||||
Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
|
||||
int *left, int *top) const;
|
||||
|
||||
/**
|
||||
* Returns the baseline of the current object at the given level.
|
||||
* The baseline is the line that passes through (x1, y1) and (x2, y2).
|
||||
* WARNING: with vertical text, baselines may be vertical!
|
||||
* Returns false if there is no baseline at the current position.
|
||||
*/
|
||||
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
|
||||
int *y2) const;
|
||||
|
||||
// Returns the attributes of the current row.
|
||||
void RowAttributes(float *row_height, float *descenders,
|
||||
float *ascenders) const;
|
||||
|
||||
/**
|
||||
* Returns orientation for the block the iterator points to.
|
||||
* orientation, writing_direction, textline_order: see publictypes.h
|
||||
* deskew_angle: after rotating the block so the text orientation is
|
||||
* upright, how many radians does one have to rotate the
|
||||
* block anti-clockwise for it to be level?
|
||||
* -Pi/4 <= deskew_angle <= Pi/4
|
||||
*/
|
||||
void Orientation(tesseract::Orientation *orientation,
|
||||
tesseract::WritingDirection *writing_direction,
|
||||
tesseract::TextlineOrder *textline_order,
|
||||
float *deskew_angle) const;
|
||||
|
||||
/**
|
||||
* Returns information about the current paragraph, if available.
|
||||
*
|
||||
* justification -
|
||||
* LEFT if ragged right, or fully justified and script is left-to-right.
|
||||
* RIGHT if ragged left, or fully justified and script is right-to-left.
|
||||
* unknown if it looks like source code or we have very few lines.
|
||||
* is_list_item -
|
||||
* true if we believe this is a member of an ordered or unordered list.
|
||||
* is_crown -
|
||||
* true if the first line of the paragraph is aligned with the other
|
||||
* lines of the paragraph even though subsequent paragraphs have first
|
||||
* line indents. This typically indicates that this is the continuation
|
||||
* of a previous paragraph or that it is the very first paragraph in
|
||||
* the chapter.
|
||||
* first_line_indent -
|
||||
* For LEFT aligned paragraphs, the first text line of paragraphs of
|
||||
* this kind are indented this many pixels from the left edge of the
|
||||
* rest of the paragraph.
|
||||
* for RIGHT aligned paragraphs, the first text line of paragraphs of
|
||||
* this kind are indented this many pixels from the right edge of the
|
||||
* rest of the paragraph.
|
||||
* NOTE 1: This value may be negative.
|
||||
* NOTE 2: if *is_crown == true, the first line of this paragraph is
|
||||
* actually flush, and first_line_indent is set to the "common"
|
||||
* first_line_indent for subsequent paragraphs in this block
|
||||
* of text.
|
||||
*/
|
||||
void ParagraphInfo(tesseract::ParagraphJustification *justification,
|
||||
bool *is_list_item, bool *is_crown,
|
||||
int *first_line_indent) const;
|
||||
|
||||
// If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
|
||||
// of the current word to the given pointer (takes ownership of the pointer)
|
||||
// and returns true.
|
||||
// Can only be used when iterating on the word level.
|
||||
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Sets up the internal data for iterating the blobs of a new word, then
|
||||
* moves the iterator to the given offset.
|
||||
*/
|
||||
void BeginWord(int offset);
|
||||
|
||||
/** Pointer to the page_res owned by the API. */
|
||||
PAGE_RES *page_res_;
|
||||
/** Pointer to the Tesseract object owned by the API. */
|
||||
Tesseract *tesseract_;
|
||||
/**
|
||||
* The iterator to the page_res_. Owned by this ResultIterator.
|
||||
* A pointer just to avoid dragging in Tesseract includes.
|
||||
*/
|
||||
PAGE_RES_IT *it_;
|
||||
/**
|
||||
* The current input WERD being iterated. If there is an output from OCR,
|
||||
* then word_ is nullptr. Owned by the API
|
||||
*/
|
||||
WERD *word_;
|
||||
/** The length of the current word_. */
|
||||
int word_length_;
|
||||
/** The current blob index within the word. */
|
||||
int blob_index_;
|
||||
/**
|
||||
* Iterator to the blobs within the word. If nullptr, then we are iterating
|
||||
* OCR results in the box_word.
|
||||
* Owned by this ResultIterator.
|
||||
*/
|
||||
C_BLOB_IT *cblob_it_;
|
||||
/** Control over what to include in bounding boxes. */
|
||||
bool include_upper_dots_;
|
||||
bool include_lower_dots_;
|
||||
/** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
|
||||
int scale_;
|
||||
int scaled_yres_;
|
||||
int rect_left_;
|
||||
int rect_top_;
|
||||
int rect_width_;
|
||||
int rect_height_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_
|
|
@ -1,281 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: publictypes.h
|
||||
// Description: Types used in both the API and internally
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// This file contains types that are used both by the API and internally
|
||||
// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
|
||||
// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
|
||||
// Restated: It is OK for low-level Tesseract files to include publictypes.h,
|
||||
// but not for the low-level tesseract code to include top-level API code.
|
||||
// This file should not use other Tesseract types, as that would drag
|
||||
// their includes into the API-level.
|
||||
|
||||
/** Number of printers' points in an inch. The unit of the pointsize return. */
|
||||
constexpr int kPointsPerInch = 72;
|
||||
/**
|
||||
* Minimum believable resolution. Used as a default if there is no other
|
||||
* information, as it is safer to under-estimate than over-estimate.
|
||||
*/
|
||||
constexpr int kMinCredibleResolution = 70;
|
||||
/** Maximum believable resolution. */
|
||||
constexpr int kMaxCredibleResolution = 2400;
|
||||
/**
|
||||
* Ratio between median blob size and likely resolution. Used to estimate
|
||||
* resolution when none is provided. This is basically 1/usual text size in
|
||||
* inches. */
|
||||
constexpr int kResolutionEstimationFactor = 10;
|
||||
|
||||
/**
|
||||
* Possible types for a POLY_BLOCK or ColPartition.
|
||||
* Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
|
||||
* below, as well as kPolyBlockNames in layout_test.cc.
|
||||
* Used extensively by ColPartition, and POLY_BLOCK.
|
||||
*/
|
||||
enum PolyBlockType {
|
||||
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
|
||||
PT_FLOWING_TEXT, // Text that lives inside a column.
|
||||
PT_HEADING_TEXT, // Text that spans more than one column.
|
||||
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
|
||||
PT_EQUATION, // Partition belonging to an equation region.
|
||||
PT_INLINE_EQUATION, // Partition has inline equation.
|
||||
PT_TABLE, // Partition belonging to a table region.
|
||||
PT_VERTICAL_TEXT, // Text-line runs vertically.
|
||||
PT_CAPTION_TEXT, // Text that belongs to an image.
|
||||
PT_FLOWING_IMAGE, // Image that lives inside a column.
|
||||
PT_HEADING_IMAGE, // Image that spans more than one column.
|
||||
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
|
||||
PT_HORZ_LINE, // Horizontal Line.
|
||||
PT_VERT_LINE, // Vertical Line.
|
||||
PT_NOISE, // Lies outside of any column.
|
||||
PT_COUNT
|
||||
};
|
||||
|
||||
/** Returns true if PolyBlockType is of horizontal line type */
|
||||
inline bool PTIsLineType(PolyBlockType type) {
|
||||
return type == PT_HORZ_LINE || type == PT_VERT_LINE;
|
||||
}
|
||||
/** Returns true if PolyBlockType is of image type */
|
||||
inline bool PTIsImageType(PolyBlockType type) {
|
||||
return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
|
||||
type == PT_PULLOUT_IMAGE;
|
||||
}
|
||||
/** Returns true if PolyBlockType is of text type */
|
||||
inline bool PTIsTextType(PolyBlockType type) {
|
||||
return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
|
||||
type == PT_PULLOUT_TEXT || type == PT_TABLE ||
|
||||
type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
|
||||
type == PT_INLINE_EQUATION;
|
||||
}
|
||||
// Returns true if PolyBlockType is of pullout(inter-column) type
|
||||
inline bool PTIsPulloutType(PolyBlockType type) {
|
||||
return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
|
||||
}
|
||||
|
||||
/**
|
||||
* +------------------+ Orientation Example:
|
||||
* | 1 Aaaa Aaaa Aaaa | ====================
|
||||
* | Aaa aa aaa aa | To left is a diagram of some (1) English and
|
||||
* | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit.
|
||||
* | 2 |
|
||||
* | ####### c c C | Upright Latin characters are represented as A and a.
|
||||
* | ####### c c c | '<' represents a latin character rotated
|
||||
* | < ####### c c c | anti-clockwise 90 degrees.
|
||||
* | < ####### c c |
|
||||
* | < ####### . c | Upright Chinese characters are represented C and c.
|
||||
* | 3 ####### c |
|
||||
* +------------------+ NOTA BENE: enum values here should match goodoc.proto
|
||||
|
||||
* If you orient your head so that "up" aligns with Orientation,
|
||||
* then the characters will appear "right side up" and readable.
|
||||
*
|
||||
* In the example above, both the English and Chinese paragraphs are oriented
|
||||
* so their "up" is the top of the page (page up). The photo credit is read
|
||||
* with one's head turned leftward ("up" is to page left).
|
||||
*
|
||||
* The values of this enum match the convention of Tesseract's osdetect.h
|
||||
*/
|
||||
enum Orientation {
|
||||
ORIENTATION_PAGE_UP = 0,
|
||||
ORIENTATION_PAGE_RIGHT = 1,
|
||||
ORIENTATION_PAGE_DOWN = 2,
|
||||
ORIENTATION_PAGE_LEFT = 3,
|
||||
};
|
||||
|
||||
/**
|
||||
* The grapheme clusters within a line of text are laid out logically
|
||||
* in this direction, judged when looking at the text line rotated so that
|
||||
* its Orientation is "page up".
|
||||
*
|
||||
* For English text, the writing direction is left-to-right. For the
|
||||
* Chinese text in the above example, the writing direction is top-to-bottom.
|
||||
*/
|
||||
enum WritingDirection {
|
||||
WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
|
||||
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
|
||||
WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
|
||||
};
|
||||
|
||||
/**
|
||||
* The text lines are read in the given sequence.
|
||||
*
|
||||
* In English, the order is top-to-bottom.
|
||||
* In Chinese, vertical text lines are read right-to-left. Mongolian is
|
||||
* written in vertical columns top to bottom like Chinese, but the lines
|
||||
* order left-to right.
|
||||
*
|
||||
* Note that only some combinations make sense. For example,
|
||||
* WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
|
||||
*/
|
||||
enum TextlineOrder {
|
||||
TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
|
||||
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
|
||||
TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
|
||||
};
|
||||
|
||||
/**
|
||||
* Possible modes for page layout analysis. These *must* be kept in order
|
||||
* of decreasing amount of layout analysis to be done, except for OSD_ONLY,
|
||||
* so that the inequality test macros below work.
|
||||
*/
|
||||
enum PageSegMode {
|
||||
PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
|
||||
PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
|
||||
///< script detection. (OSD)
|
||||
PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
|
||||
PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
|
||||
PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
|
||||
PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
|
||||
///< vertically aligned text.
|
||||
PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
|
||||
PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
|
||||
PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
|
||||
PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
|
||||
PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
|
||||
PSM_SPARSE_TEXT =
|
||||
11, ///< Find as much text as possible in no particular order.
|
||||
PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
|
||||
PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
|
||||
///< hacks that are Tesseract-specific.
|
||||
|
||||
PSM_COUNT ///< Number of enum entries.
|
||||
};
|
||||
|
||||
/**
|
||||
* Inline functions that act on a PageSegMode to determine whether components of
|
||||
* layout analysis are enabled.
|
||||
* *Depend critically on the order of elements of PageSegMode.*
|
||||
* NOTE that arg is an int for compatibility with INT_PARAM.
|
||||
*/
|
||||
inline bool PSM_OSD_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
|
||||
}
|
||||
inline bool PSM_SPARSE(int pageseg_mode) {
|
||||
return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
|
||||
}
|
||||
inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
|
||||
}
|
||||
inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
|
||||
return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
|
||||
pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
|
||||
/**
|
||||
* enum of the elements of the page hierarchy, used in ResultIterator
|
||||
* to provide functions that operate on each level without having to
|
||||
* have 5x as many functions.
|
||||
*/
|
||||
enum PageIteratorLevel {
|
||||
RIL_BLOCK, // Block of text/image/separator line.
|
||||
RIL_PARA, // Paragraph within a block.
|
||||
RIL_TEXTLINE, // Line within a paragraph.
|
||||
RIL_WORD, // Word within a textline.
|
||||
RIL_SYMBOL // Symbol/character within a word.
|
||||
};
|
||||
|
||||
/**
|
||||
* JUSTIFICATION_UNKNOWN
|
||||
* The alignment is not clearly one of the other options. This could happen
|
||||
* for example if there are only one or two lines of text or the text looks
|
||||
* like source code or poetry.
|
||||
*
|
||||
* NOTA BENE: Fully justified paragraphs (text aligned to both left and right
|
||||
* margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
|
||||
* is written with a left-to-right script and with JUSTIFICATION_RIGHT if
|
||||
* their text is written in a right-to-left script.
|
||||
*
|
||||
* Interpretation for text read in vertical lines:
|
||||
* "Left" is wherever the starting reading position is.
|
||||
*
|
||||
* JUSTIFICATION_LEFT
|
||||
* Each line, except possibly the first, is flush to the same left tab stop.
|
||||
*
|
||||
* JUSTIFICATION_CENTER
|
||||
* The text lines of the paragraph are centered about a line going
|
||||
* down through their middle of the text lines.
|
||||
*
|
||||
* JUSTIFICATION_RIGHT
|
||||
* Each line, except possibly the first, is flush to the same right tab stop.
|
||||
*/
|
||||
enum ParagraphJustification {
|
||||
JUSTIFICATION_UNKNOWN,
|
||||
JUSTIFICATION_LEFT,
|
||||
JUSTIFICATION_CENTER,
|
||||
JUSTIFICATION_RIGHT,
|
||||
};
|
||||
|
||||
/**
|
||||
* When Tesseract/Cube is initialized we can choose to instantiate/load/run
|
||||
* only the Tesseract part, only the Cube part or both along with the combiner.
|
||||
* The preference of which engine to use is stored in tessedit_ocr_engine_mode.
|
||||
*
|
||||
* ATTENTION: When modifying this enum, please make sure to make the
|
||||
* appropriate changes to all the enums mirroring it (e.g. OCREngine in
|
||||
* cityblock/workflow/detection/detection_storage.proto). Such enums will
|
||||
* mention the connection to OcrEngineMode in the comments.
|
||||
*/
|
||||
enum OcrEngineMode {
|
||||
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
|
||||
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
|
||||
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
|
||||
// to Tesseract when things get difficult.
|
||||
// deprecated
|
||||
OEM_DEFAULT, // Specify this mode when calling init_*(),
|
||||
// to indicate that any of the above modes
|
||||
// should be automatically inferred from the
|
||||
// variables in the language-specific config,
|
||||
// command-line configs, or if not specified
|
||||
// in any of the above should be set to the
|
||||
// default OEM_TESSERACT_ONLY.
|
||||
OEM_COUNT // Number of OEMs
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
|
@ -1,311 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: renderer.h
|
||||
// Description: Rendering interface to inject into TessBaseAPI
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_RENDERER_H_
|
||||
#define TESSERACT_API_RENDERER_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
|
||||
// complexity of includes here. Use forward declarations wherever possible
|
||||
// and hide includes of complex types in baseapi.cpp.
|
||||
#include <cstdint>
|
||||
#include <string> // for std::string
|
||||
#include <vector> // for std::vector
|
||||
|
||||
struct Pix;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class TessBaseAPI;
|
||||
|
||||
/**
|
||||
* Interface for rendering tesseract results into a document, such as text,
|
||||
* HOCR or pdf. This class is abstract. Specific classes handle individual
|
||||
* formats. This interface is then used to inject the renderer class into
|
||||
* tesseract when processing images.
|
||||
*
|
||||
* For simplicity implementing this with tesseract version 3.01,
|
||||
* the renderer contains document state that is cleared from document
|
||||
* to document just as the TessBaseAPI is. This way the base API can just
|
||||
* delegate its rendering functionality to injected renderers, and the
|
||||
* renderers can manage the associated state needed for the specific formats
|
||||
* in addition to the heuristics for producing it.
|
||||
*/
|
||||
class TESS_API TessResultRenderer {
|
||||
public:
|
||||
virtual ~TessResultRenderer();
|
||||
|
||||
// Takes ownership of pointer so must be new'd instance.
|
||||
// Renderers aren't ordered, but appends the sequences of next parameter
|
||||
// and existing next(). The renderers should be unique across both lists.
|
||||
void insert(TessResultRenderer *next);
|
||||
|
||||
// Returns the next renderer or nullptr.
|
||||
TessResultRenderer *next() {
|
||||
return next_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts a new document with the given title.
|
||||
* This clears the contents of the output data.
|
||||
* Title should use UTF-8 encoding.
|
||||
*/
|
||||
bool BeginDocument(const char *title);
|
||||
|
||||
/**
|
||||
* Adds the recognized text from the source image to the current document.
|
||||
* Invalid if BeginDocument not yet called.
|
||||
*
|
||||
* Note that this API is a bit weird but is designed to fit into the
|
||||
* current TessBaseAPI implementation where the api has lots of state
|
||||
* information that we might want to add in.
|
||||
*/
|
||||
bool AddImage(TessBaseAPI *api);
|
||||
|
||||
/**
|
||||
* Finishes the document and finalizes the output data
|
||||
* Invalid if BeginDocument not yet called.
|
||||
*/
|
||||
bool EndDocument();
|
||||
|
||||
const char *file_extension() const {
|
||||
return file_extension_;
|
||||
}
|
||||
const char *title() const {
|
||||
return title_.c_str();
|
||||
}
|
||||
|
||||
// Is everything fine? Otherwise something went wrong.
|
||||
bool happy() const {
|
||||
return happy_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the last image given to AddImage
|
||||
* (i.e. images are incremented whether the image succeeded or not)
|
||||
*
|
||||
* This is always defined. It means either the number of the
|
||||
* current image, the last image ended, or in the completed document
|
||||
* depending on when in the document lifecycle you are looking at it.
|
||||
* Will return -1 if a document was never started.
|
||||
*/
|
||||
int imagenum() const {
|
||||
return imagenum_;
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Called by concrete classes.
|
||||
*
|
||||
* outputbase is the name of the output file excluding
|
||||
* extension. For example, "/path/to/chocolate-chip-cookie-recipe"
|
||||
*
|
||||
* extension indicates the file extension to be used for output
|
||||
* files. For example "pdf" will produce a .pdf file, and "hocr"
|
||||
* will produce .hocr files.
|
||||
*/
|
||||
TessResultRenderer(const char *outputbase, const char *extension);
|
||||
|
||||
// Hook for specialized handling in BeginDocument()
|
||||
virtual bool BeginDocumentHandler();
|
||||
|
||||
// This must be overridden to render the OCR'd results
|
||||
virtual bool AddImageHandler(TessBaseAPI *api) = 0;
|
||||
|
||||
// Hook for specialized handling in EndDocument()
|
||||
virtual bool EndDocumentHandler();
|
||||
|
||||
// Renderers can call this to append '\0' terminated strings into
|
||||
// the output string returned by GetOutput.
|
||||
// This method will grow the output buffer if needed.
|
||||
void AppendString(const char *s);
|
||||
|
||||
// Renderers can call this to append binary byte sequences into
|
||||
// the output string returned by GetOutput. Note that s is not necessarily
|
||||
// '\0' terminated (and can contain '\0' within it).
|
||||
// This method will grow the output buffer if needed.
|
||||
void AppendData(const char *s, int len);
|
||||
|
||||
private:
|
||||
TessResultRenderer *next_; // Can link multiple renderers together
|
||||
FILE *fout_; // output file pointer
|
||||
const char *file_extension_; // standard extension for generated output
|
||||
std::string title_; // title of document being rendered
|
||||
int imagenum_; // index of last image added
|
||||
bool happy_; // I get grumpy when the disk fills up, etc.
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessTextRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessTextRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an hocr text string
|
||||
*/
|
||||
class TESS_API TessHOcrRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessHOcrRenderer(const char *outputbase, bool font_info);
|
||||
explicit TessHOcrRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool font_info_; // whether to print font information
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an alto text string
|
||||
*/
|
||||
class TESS_API TessAltoRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessAltoRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool begin_document;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders Tesseract output into a TSV string
|
||||
*/
|
||||
class TESS_API TessTsvRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessTsvRenderer(const char *outputbase, bool font_info);
|
||||
explicit TessTsvRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool font_info_; // whether to print font information
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into searchable PDF
|
||||
*/
|
||||
class TESS_API TessPDFRenderer : public TessResultRenderer {
|
||||
public:
|
||||
// datadir is the location of the TESSDATA. We need it because
|
||||
// we load a custom PDF font from this location.
|
||||
TessPDFRenderer(const char *outputbase, const char *datadir,
|
||||
bool textonly = false);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
// We don't want to have every image in memory at once,
|
||||
// so we store some metadata as we go along producing
|
||||
// PDFs one page at a time. At the end, that metadata is
|
||||
// used to make everything that isn't easily handled in a
|
||||
// streaming fashion.
|
||||
long int obj_; // counter for PDF objects
|
||||
std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
|
||||
std::vector<long int> pages_; // object number for every /Page object
|
||||
std::string datadir_; // where to find the custom font
|
||||
bool textonly_; // skip images if set
|
||||
// Bookkeeping only. DIY = Do It Yourself.
|
||||
void AppendPDFObjectDIY(size_t objectsize);
|
||||
// Bookkeeping + emit data.
|
||||
void AppendPDFObject(const char *data);
|
||||
// Create the /Contents object for an entire page.
|
||||
char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
|
||||
// Turn an image into a PDF object. Only transcode if we have to.
|
||||
static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
|
||||
char **pdf_object, long int *pdf_object_size,
|
||||
int jpg_quality);
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessUnlvRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessUnlvRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string for LSTMBox
|
||||
*/
|
||||
class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessLSTMBoxRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessBoxTextRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string in WordStr format
|
||||
*/
|
||||
class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessWordStrBoxRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an osd text string
|
||||
*/
|
||||
class TESS_API TessOsdRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessOsdRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_API_RENDERER_H_
|
|
@ -1,250 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: resultiterator.h
|
||||
// Description: Iterator for tesseract results that is capable of
|
||||
// iterating in proper reading order over Bi Directional
|
||||
// (e.g. mixed Hebrew and English) text.
|
||||
// Author: David Eger
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
||||
|
||||
#include "export.h" // for TESS_API, TESS_LOCAL
|
||||
#include "ltrresultiterator.h" // for LTRResultIterator
|
||||
#include "publictypes.h" // for PageIteratorLevel
|
||||
#include "unichar.h" // for StrongScriptDirection
|
||||
|
||||
#include <set> // for std::pair
|
||||
#include <vector> // for std::vector
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class TESS_API ResultIterator : public LTRResultIterator {
|
||||
public:
|
||||
static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
|
||||
|
||||
/**
|
||||
* ResultIterator is copy constructible!
|
||||
* The default copy constructor works just fine for us.
|
||||
*/
|
||||
~ResultIterator() override = default;
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
/**
|
||||
* Moves the iterator to point to the start of the page to begin
|
||||
* an iteration.
|
||||
*/
|
||||
void Begin() override;
|
||||
|
||||
/**
|
||||
* Moves to the start of the next object at the given level in the
|
||||
* page hierarchy in the appropriate reading order and returns false if
|
||||
* the end of the page was reached.
|
||||
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
|
||||
* PageIteratorLevel level values will visit each non-text block once.
|
||||
* Think of non text blocks as containing a single para, with a single line,
|
||||
* with a single imaginary word.
|
||||
* Calls to Next with different levels may be freely intermixed.
|
||||
* This function iterates words in right-to-left scripts correctly, if
|
||||
* the appropriate language has been loaded into Tesseract.
|
||||
*/
|
||||
bool Next(PageIteratorLevel level) override;
|
||||
|
||||
/**
|
||||
* IsAtBeginningOf() returns whether we're at the logical beginning of the
|
||||
* given level. (as opposed to ResultIterator's left-to-right top-to-bottom
|
||||
* order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
|
||||
* For a full description, see pageiterator.h
|
||||
*/
|
||||
bool IsAtBeginningOf(PageIteratorLevel level) const override;
|
||||
|
||||
/**
|
||||
* Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
|
||||
* For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
|
||||
* point at the last word in a paragraph. See PageIterator for full comment.
|
||||
*/
|
||||
bool IsAtFinalElement(PageIteratorLevel level,
|
||||
PageIteratorLevel element) const override;
|
||||
|
||||
// ============= Functions that refer to words only ============.
|
||||
// Returns the number of blanks before the current word.
|
||||
int BlanksBeforeWord() const;
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
/**
|
||||
* Returns the null terminated UTF-8 encoded text string for the current
|
||||
* object at the given level. Use delete [] to free after use.
|
||||
*/
|
||||
virtual char *GetUTF8Text(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns the LSTM choices for every LSTM timestep for the current word.
|
||||
*/
|
||||
virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
|
||||
*GetRawLSTMTimesteps() const;
|
||||
virtual std::vector<std::vector<std::pair<const char *, float>>>
|
||||
*GetBestLSTMSymbolChoices() const;
|
||||
|
||||
/**
|
||||
* Return whether the current paragraph's dominant reading direction
|
||||
* is left-to-right (as opposed to right-to-left).
|
||||
*/
|
||||
bool ParagraphIsLtr() const;
|
||||
|
||||
// ============= Exposed only for testing =============.
|
||||
|
||||
/**
|
||||
* Yields the reading order as a sequence of indices and (optional)
|
||||
* meta-marks for a set of words (given left-to-right).
|
||||
* The meta marks are passed as negative values:
|
||||
* kMinorRunStart Start of minor direction text.
|
||||
* kMinorRunEnd End of minor direction text.
|
||||
* kComplexWord The next indexed word contains both left-to-right and
|
||||
* right-to-left characters and was treated as neutral.
|
||||
*
|
||||
* For example, suppose we have five words in a text line,
|
||||
* indexed [0,1,2,3,4] from the leftmost side of the text line.
|
||||
* The following are all believable reading_orders:
|
||||
*
|
||||
* Left-to-Right (in ltr paragraph):
|
||||
* { 0, 1, 2, 3, 4 }
|
||||
* Left-to-Right (in rtl paragraph):
|
||||
* { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
|
||||
* Right-to-Left (in rtl paragraph):
|
||||
* { 4, 3, 2, 1, 0 }
|
||||
* Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
|
||||
* { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
|
||||
*/
|
||||
static void CalculateTextlineOrder(
|
||||
bool paragraph_is_ltr,
|
||||
const std::vector<StrongScriptDirection> &word_dirs,
|
||||
std::vector<int> *reading_order);
|
||||
|
||||
static const int kMinorRunStart;
|
||||
static const int kMinorRunEnd;
|
||||
static const int kComplexWord;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* We presume the data associated with the given iterator will outlive us.
|
||||
* NB: This is private because it does something that is non-obvious:
|
||||
* it resets to the beginning of the paragraph instead of staying wherever
|
||||
* resit might have pointed.
|
||||
*/
|
||||
explicit ResultIterator(const LTRResultIterator &resit);
|
||||
|
||||
private:
|
||||
/**
|
||||
* Calculates the current paragraph's dominant writing direction.
|
||||
* Typically, members should use current_paragraph_ltr_ instead.
|
||||
*/
|
||||
bool CurrentParagraphIsLtr() const;
|
||||
|
||||
/**
|
||||
* Returns word indices as measured from resit->RestartRow() = index 0
|
||||
* for the reading order of words within a textline given an iterator
|
||||
* into the middle of the text line.
|
||||
* In addition to non-negative word indices, the following negative values
|
||||
* may be inserted:
|
||||
* kMinorRunStart Start of minor direction text.
|
||||
* kMinorRunEnd End of minor direction text.
|
||||
* kComplexWord The previous word contains both left-to-right and
|
||||
* right-to-left characters and was treated as neutral.
|
||||
*/
|
||||
void CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
const LTRResultIterator &resit,
|
||||
std::vector<int> *indices) const;
|
||||
/** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
|
||||
void CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
const LTRResultIterator &resit,
|
||||
std::vector<StrongScriptDirection> *ssd,
|
||||
std::vector<int> *indices) const;
|
||||
|
||||
/**
|
||||
* What is the index of the current word in a strict left-to-right reading
|
||||
* of the row?
|
||||
*/
|
||||
int LTRWordIndex() const;
|
||||
|
||||
/**
|
||||
* Given an iterator pointing at a word, returns the logical reading order
|
||||
* of blob indices for the word.
|
||||
*/
|
||||
void CalculateBlobOrder(std::vector<int> *blob_indices) const;
|
||||
|
||||
/** Precondition: current_paragraph_is_ltr_ is set. */
|
||||
void MoveToLogicalStartOfTextline();
|
||||
|
||||
/**
|
||||
* Precondition: current_paragraph_is_ltr_ and in_minor_direction_
|
||||
* are set.
|
||||
*/
|
||||
void MoveToLogicalStartOfWord();
|
||||
|
||||
/** Are we pointing at the final (reading order) symbol of the word? */
|
||||
bool IsAtFinalSymbolOfWord() const;
|
||||
|
||||
/** Are we pointing at the first (reading order) symbol of the word? */
|
||||
bool IsAtFirstSymbolOfWord() const;
|
||||
|
||||
/**
|
||||
* Append any extra marks that should be appended to this word when printed.
|
||||
* Mostly, these are Unicode BiDi control characters.
|
||||
*/
|
||||
void AppendSuffixMarks(std::string *text) const;
|
||||
|
||||
/** Appends the current word in reading order to the given buffer.*/
|
||||
void AppendUTF8WordText(std::string *text) const;
|
||||
|
||||
/**
|
||||
* Appends the text of the current text line, *assuming this iterator is
|
||||
* positioned at the beginning of the text line* This function
|
||||
* updates the iterator to point to the first position past the text line.
|
||||
* Each textline is terminated in a single newline character.
|
||||
* If the textline ends a paragraph, it gets a second terminal newline.
|
||||
*/
|
||||
void IterateAndAppendUTF8TextlineText(std::string *text);
|
||||
|
||||
/**
|
||||
* Appends the text of the current paragraph in reading order
|
||||
* to the given buffer.
|
||||
* Each textline is terminated in a single newline character, and the
|
||||
* paragraph gets an extra newline at the end.
|
||||
*/
|
||||
void AppendUTF8ParagraphText(std::string *text) const;
|
||||
|
||||
/** Returns whether the bidi_debug flag is set to at least min_level. */
|
||||
bool BidiDebug(int min_level) const;
|
||||
|
||||
bool current_paragraph_is_ltr_;
|
||||
|
||||
/**
|
||||
* Is the currently pointed-at character at the beginning of
|
||||
* a minor-direction run?
|
||||
*/
|
||||
bool at_beginning_of_minor_run_;
|
||||
|
||||
/** Is the currently pointed-at character in a minor-direction sequence? */
|
||||
bool in_minor_direction_;
|
||||
|
||||
/**
|
||||
* Should detected inter-word spaces be preserved, or "compressed" to a single
|
||||
* space character (default behavior).
|
||||
*/
|
||||
bool preserve_interword_spaces_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
|
@ -1,174 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: unichar.h
|
||||
// Description: Unicode character/ligature class.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCUTIL_UNICHAR_H_
|
||||
#define TESSERACT_CCUTIL_UNICHAR_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
#include <memory.h>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Maximum number of characters that can be stored in a UNICHAR. Must be
|
||||
// at least 4. Must not exceed 31 without changing the coding of length.
|
||||
#define UNICHAR_LEN 30
|
||||
|
||||
// A UNICHAR_ID is the unique id of a unichar.
|
||||
using UNICHAR_ID = int;
|
||||
|
||||
// A variable to indicate an invalid or uninitialized unichar id.
|
||||
static const int INVALID_UNICHAR_ID = -1;
|
||||
// A special unichar that corresponds to INVALID_UNICHAR_ID.
|
||||
static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
|
||||
|
||||
enum StrongScriptDirection {
|
||||
DIR_NEUTRAL = 0, // Text contains only neutral characters.
|
||||
DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
|
||||
DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
|
||||
DIR_MIX = 3, // Text contains a mixture of left-to-right
|
||||
// and right-to-left characters.
|
||||
};
|
||||
|
||||
using char32 = signed int;
|
||||
|
||||
// The UNICHAR class holds a single classification result. This may be
|
||||
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
|
||||
// multiple Unicode characters representing the NFKC expansion of a ligature
|
||||
// such as fi, ffl etc. These are also stored as utf8.
|
||||
class TESS_API UNICHAR {
|
||||
public:
|
||||
UNICHAR() {
|
||||
memset(chars, 0, UNICHAR_LEN);
|
||||
}
|
||||
|
||||
// Construct from a utf8 string. If len<0 then the string is null terminated.
|
||||
// If the string is too long to fit in the UNICHAR then it takes only what
|
||||
// will fit.
|
||||
UNICHAR(const char *utf8_str, int len);
|
||||
|
||||
// Construct from a single UCS4 character.
|
||||
explicit UNICHAR(int unicode);
|
||||
|
||||
// Default copy constructor and operator= are OK.
|
||||
|
||||
// Get the first character as UCS-4.
|
||||
int first_uni() const;
|
||||
|
||||
// Get the length of the UTF8 string.
|
||||
int utf8_len() const {
|
||||
int len = chars[UNICHAR_LEN - 1];
|
||||
return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
|
||||
}
|
||||
|
||||
// Get a UTF8 string, but NOT nullptr terminated.
|
||||
const char *utf8() const {
|
||||
return chars;
|
||||
}
|
||||
|
||||
// Get a terminated UTF8 string: Must delete[] it after use.
|
||||
char *utf8_str() const;
|
||||
|
||||
// Get the number of bytes in the first character of the given utf8 string.
|
||||
static int utf8_step(const char *utf8_str);
|
||||
|
||||
// A class to simplify iterating over and accessing elements of a UTF8
|
||||
// string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
|
||||
// take ownership of the underlying byte array. It also does not permit
|
||||
// modification of the array (as the name suggests).
|
||||
//
|
||||
// Example:
|
||||
// for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
|
||||
// it != UNICHAR::end(str, len);
|
||||
// ++it) {
|
||||
// printf("UCS-4 symbol code = %d\n", *it);
|
||||
// char buf[5];
|
||||
// int char_len = it.get_utf8(buf); buf[char_len] = '\0';
|
||||
// printf("Char = %s\n", buf);
|
||||
// }
|
||||
class TESS_API const_iterator {
|
||||
using CI = const_iterator;
|
||||
|
||||
public:
|
||||
// Step to the next UTF8 character.
|
||||
// If the current position is at an illegal UTF8 character, then print an
|
||||
// error message and step by one byte. If the current position is at a
|
||||
// nullptr value, don't step past it.
|
||||
const_iterator &operator++();
|
||||
|
||||
// Return the UCS-4 value at the current position.
|
||||
// If the current position is at an illegal UTF8 value, return a single
|
||||
// space character.
|
||||
int operator*() const;
|
||||
|
||||
// Store the UTF-8 encoding of the current codepoint into buf, which must be
|
||||
// at least 4 bytes long. Return the number of bytes written.
|
||||
// If the current position is at an illegal UTF8 value, writes a single
|
||||
// space character and returns 1.
|
||||
// Note that this method does not null-terminate the buffer.
|
||||
int get_utf8(char *buf) const;
|
||||
// Returns the number of bytes of the current codepoint. Returns 1 if the
|
||||
// current position is at an illegal UTF8 value.
|
||||
int utf8_len() const;
|
||||
// Returns true if the UTF-8 encoding at the current position is legal.
|
||||
bool is_legal() const;
|
||||
|
||||
// Return the pointer into the string at the current position.
|
||||
const char *utf8_data() const {
|
||||
return it_;
|
||||
}
|
||||
|
||||
// Iterator equality operators.
|
||||
friend bool operator==(const CI &lhs, const CI &rhs) {
|
||||
return lhs.it_ == rhs.it_;
|
||||
}
|
||||
friend bool operator!=(const CI &lhs, const CI &rhs) {
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
private:
|
||||
friend class UNICHAR;
|
||||
explicit const_iterator(const char *it) : it_(it) {}
|
||||
|
||||
const char *it_; // Pointer into the string.
|
||||
};
|
||||
|
||||
// Create a start/end iterator pointing to a string. Note that these methods
|
||||
// are static and do NOT create a copy or take ownership of the underlying
|
||||
// array.
|
||||
static const_iterator begin(const char *utf8_str, int byte_length);
|
||||
static const_iterator end(const char *utf8_str, int byte_length);
|
||||
|
||||
// Converts a utf-8 string to a vector of unicodes.
|
||||
// Returns an empty vector if the input contains invalid UTF-8.
|
||||
static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
|
||||
// Converts a vector of unicodes to a utf8 string.
|
||||
// Returns an empty string if the input contains an invalid unicode.
|
||||
static std::string UTF32ToUTF8(const std::vector<char32> &str32);
|
||||
|
||||
private:
|
||||
// A UTF-8 representation of 1 or more Unicode characters.
|
||||
// The last element (chars[UNICHAR_LEN - 1]) is a length if
|
||||
// its value < UNICHAR_LEN, otherwise it is a genuine character.
|
||||
char chars[UNICHAR_LEN]{};
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCUTIL_UNICHAR_H_
|
|
@ -1,34 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: version.h
|
||||
// Description: Version information
|
||||
//
|
||||
// (C) Copyright 2018, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_VERSION_H_
|
||||
#define TESSERACT_API_VERSION_H_
|
||||
|
||||
// clang-format off
|
||||
|
||||
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
|
||||
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
|
||||
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
|
||||
|
||||
#define TESSERACT_VERSION \
|
||||
(TESSERACT_MAJOR_VERSION << 16 | \
|
||||
TESSERACT_MINOR_VERSION << 8 | \
|
||||
TESSERACT_MICRO_VERSION)
|
||||
|
||||
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
|
||||
|
||||
// clang-format on
|
||||
|
||||
#endif // TESSERACT_API_VERSION_H_
|
|
@ -1,812 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: baseapi.h
|
||||
// Description: Simple API for calling tesseract.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_BASEAPI_H_
|
||||
#define TESSERACT_API_BASEAPI_H_
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config_auto.h" // DISABLED_LEGACY_ENGINE
|
||||
#endif
|
||||
|
||||
#include "export.h"
|
||||
#include "pageiterator.h"
|
||||
#include "publictypes.h"
|
||||
#include "resultiterator.h"
|
||||
#include "unichar.h"
|
||||
|
||||
#include "version.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <vector> // for std::vector
|
||||
|
||||
struct Pix;
|
||||
struct Pixa;
|
||||
struct Boxa;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class PAGE_RES;
|
||||
class ParagraphModel;
|
||||
class BLOCK_LIST;
|
||||
class ETEXT_DESC;
|
||||
struct OSResults;
|
||||
class UNICHARSET;
|
||||
|
||||
class Dawg;
|
||||
class Dict;
|
||||
class EquationDetect;
|
||||
class PageIterator;
|
||||
class ImageThresholder;
|
||||
class LTRResultIterator;
|
||||
class ResultIterator;
|
||||
class MutableIterator;
|
||||
class TessResultRenderer;
|
||||
class Tesseract;
|
||||
|
||||
// Function to read a std::vector<char> from a whole file.
|
||||
// Returns false on failure.
|
||||
using FileReader = bool (*)(const char *filename, std::vector<char> *data);
|
||||
|
||||
using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
|
||||
bool) const;
|
||||
using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
|
||||
int, const char *, int);
|
||||
|
||||
/**
|
||||
* Base class for all tesseract APIs.
|
||||
* Specific classes can add ability to work on different inputs or produce
|
||||
* different outputs.
|
||||
* This class is mostly an interface layer on top of the Tesseract instance
|
||||
* class to hide the data types so that users of this class don't have to
|
||||
* include any other Tesseract headers.
|
||||
*/
|
||||
class TESS_API TessBaseAPI {
|
||||
public:
|
||||
TessBaseAPI();
|
||||
virtual ~TessBaseAPI();
|
||||
// Copy constructor and assignment operator are currently unsupported.
|
||||
TessBaseAPI(TessBaseAPI const &) = delete;
|
||||
TessBaseAPI &operator=(TessBaseAPI const &) = delete;
|
||||
|
||||
/**
|
||||
* Returns the version identifier as a static string. Do not delete.
|
||||
*/
|
||||
static const char *Version();
|
||||
|
||||
/**
|
||||
* If compiled with OpenCL AND an available OpenCL
|
||||
* device is deemed faster than serial code, then
|
||||
* "device" is populated with the cl_device_id
|
||||
* and returns sizeof(cl_device_id)
|
||||
* otherwise *device=nullptr and returns 0.
|
||||
*/
|
||||
static size_t getOpenCLDevice(void **device);
|
||||
|
||||
/**
|
||||
* Set the name of the input file. Needed for training and
|
||||
* reading a UNLV zone file, and for searchable PDF output.
|
||||
*/
|
||||
void SetInputName(const char *name);
|
||||
/**
|
||||
* These functions are required for searchable PDF output.
|
||||
* We need our hands on the input file so that we can include
|
||||
* it in the PDF without transcoding. If that is not possible,
|
||||
* we need the original image. Finally, resolution metadata
|
||||
* is stored in the PDF so we need that as well.
|
||||
*/
|
||||
const char *GetInputName();
|
||||
// Takes ownership of the input pix.
|
||||
void SetInputImage(Pix *pix);
|
||||
Pix *GetInputImage();
|
||||
int GetSourceYResolution();
|
||||
const char *GetDatapath();
|
||||
|
||||
/** Set the name of the bonus output files. Needed only for debugging. */
|
||||
void SetOutputName(const char *name);
|
||||
|
||||
/**
|
||||
* Set the value of an internal "parameter."
|
||||
* Supply the name of the parameter and the value as a string, just as
|
||||
* you would in a config file.
|
||||
* Returns false if the name lookup failed.
|
||||
* Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
|
||||
* Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
|
||||
* SetVariable may be used before Init, but settings will revert to
|
||||
* defaults on End().
|
||||
*
|
||||
* Note: Must be called after Init(). Only works for non-init variables
|
||||
* (init variables should be passed to Init()).
|
||||
*/
|
||||
bool SetVariable(const char *name, const char *value);
|
||||
bool SetDebugVariable(const char *name, const char *value);
|
||||
|
||||
/**
|
||||
* Returns true if the parameter was found among Tesseract parameters.
|
||||
* Fills in value with the value of the parameter.
|
||||
*/
|
||||
bool GetIntVariable(const char *name, int *value) const;
|
||||
bool GetBoolVariable(const char *name, bool *value) const;
|
||||
bool GetDoubleVariable(const char *name, double *value) const;
|
||||
|
||||
/**
|
||||
* Returns the pointer to the string that represents the value of the
|
||||
* parameter if it was found among Tesseract parameters.
|
||||
*/
|
||||
const char *GetStringVariable(const char *name) const;
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Print Tesseract fonts table to the given file.
|
||||
*/
|
||||
void PrintFontsTable(FILE *fp) const;
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Print Tesseract parameters to the given file.
|
||||
*/
|
||||
void PrintVariables(FILE *fp) const;
|
||||
|
||||
/**
|
||||
* Get value of named variable as a string, if it exists.
|
||||
*/
|
||||
bool GetVariableAsString(const char *name, std::string *val) const;
|
||||
|
||||
/**
|
||||
* Instances are now mostly thread-safe and totally independent,
|
||||
* but some global parameters remain. Basically it is safe to use multiple
|
||||
* TessBaseAPIs in different threads in parallel, UNLESS:
|
||||
* you use SetVariable on some of the Params in classify and textord.
|
||||
* If you do, then the effect will be to change it for all your instances.
|
||||
*
|
||||
* Start tesseract. Returns zero on success and -1 on failure.
|
||||
* NOTE that the only members that may be called before Init are those
|
||||
* listed above here in the class definition.
|
||||
*
|
||||
* The datapath must be the name of the tessdata directory.
|
||||
* The language is (usually) an ISO 639-3 string or nullptr will default to
|
||||
* eng. It is entirely safe (and eventually will be efficient too) to call
|
||||
* Init multiple times on the same instance to change language, or just
|
||||
* to reset the classifier.
|
||||
* The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
|
||||
* that multiple languages are to be loaded. Eg hin+eng will load Hindi and
|
||||
* English. Languages may specify internally that they want to be loaded
|
||||
* with one or more other languages, so the ~ sign is available to override
|
||||
* that. Eg if hin were set to load eng by default, then hin+~eng would force
|
||||
* loading only hin. The number of loaded languages is limited only by
|
||||
* memory, with the caveat that loading additional languages will impact
|
||||
* both speed and accuracy, as there is more work to do to decide on the
|
||||
* applicable language, and there is more chance of hallucinating incorrect
|
||||
* words.
|
||||
* WARNING: On changing languages, all Tesseract parameters are reset
|
||||
* back to their default values. (Which may vary between languages.)
|
||||
* If you have a rare need to set a Variable that controls
|
||||
* initialization for a second call to Init you should explicitly
|
||||
* call End() and then use SetVariable before Init. This is only a very
|
||||
* rare use case, since there are very few uses that require any parameters
|
||||
* to be set before Init.
|
||||
*
|
||||
* If set_only_non_debug_params is true, only params that do not contain
|
||||
* "debug" in the name will be set.
|
||||
*/
|
||||
int Init(const char *datapath, const char *language, OcrEngineMode mode,
|
||||
char **configs, int configs_size,
|
||||
const std::vector<std::string> *vars_vec,
|
||||
const std::vector<std::string> *vars_values,
|
||||
bool set_only_non_debug_params);
|
||||
int Init(const char *datapath, const char *language, OcrEngineMode oem) {
|
||||
return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
|
||||
}
|
||||
int Init(const char *datapath, const char *language) {
|
||||
return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
|
||||
false);
|
||||
}
|
||||
// In-memory version reads the traineddata file directly from the given
|
||||
// data[data_size] array, and/or reads data via a FileReader.
|
||||
int Init(const char *data, int data_size, const char *language,
|
||||
OcrEngineMode mode, char **configs, int configs_size,
|
||||
const std::vector<std::string> *vars_vec,
|
||||
const std::vector<std::string> *vars_values,
|
||||
bool set_only_non_debug_params, FileReader reader);
|
||||
|
||||
/**
|
||||
* Returns the languages string used in the last valid initialization.
|
||||
* If the last initialization specified "deu+hin" then that will be
|
||||
* returned. If hin loaded eng automatically as well, then that will
|
||||
* not be included in this list. To find the languages actually
|
||||
* loaded use GetLoadedLanguagesAsVector.
|
||||
* The returned string should NOT be deleted.
|
||||
*/
|
||||
const char *GetInitLanguagesAsString() const;
|
||||
|
||||
/**
|
||||
* Returns the loaded languages in the vector of std::string.
|
||||
* Includes all languages loaded by the last Init, including those loaded
|
||||
* as dependencies of other loaded languages.
|
||||
*/
|
||||
void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
|
||||
|
||||
/**
|
||||
* Returns the available languages in the sorted vector of std::string.
|
||||
*/
|
||||
void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
|
||||
|
||||
/**
|
||||
* Init only for page layout analysis. Use only for calls to SetImage and
|
||||
* AnalysePage. Calls that attempt recognition will generate an error.
|
||||
*/
|
||||
void InitForAnalysePage();
|
||||
|
||||
/**
|
||||
* Read a "config" file containing a set of param, value pairs.
|
||||
* Searches the standard places: tessdata/configs, tessdata/tessconfigs
|
||||
* and also accepts a relative or absolute path name.
|
||||
* Note: only non-init params will be set (init params are set by Init()).
|
||||
*/
|
||||
void ReadConfigFile(const char *filename);
|
||||
/** Same as above, but only set debug params from the given config file. */
|
||||
void ReadDebugConfigFile(const char *filename);
|
||||
|
||||
/**
|
||||
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
|
||||
* The mode is stored as an IntParam so it can also be modified by
|
||||
* ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
|
||||
*/
|
||||
void SetPageSegMode(PageSegMode mode);
|
||||
|
||||
/** Return the current page segmentation mode. */
|
||||
PageSegMode GetPageSegMode() const;
|
||||
|
||||
/**
|
||||
* Recognize a rectangle from an image and return the result as a string.
|
||||
* May be called many times for a single Init.
|
||||
* Currently has no error checking.
|
||||
* Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
|
||||
* Palette color images will not work properly and must be converted to
|
||||
* 24 bit.
|
||||
* Binary images of 1 bit per pixel may also be given but they must be
|
||||
* byte packed with the MSB of the first byte being the first pixel, and a
|
||||
* 1 represents WHITE. For binary images set bytes_per_pixel=0.
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
*
|
||||
* Note that TesseractRect is the simplified convenience interface.
|
||||
* For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
|
||||
* and one or more of the Get*Text functions below.
|
||||
*/
|
||||
char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
|
||||
int bytes_per_line, int left, int top, int width,
|
||||
int height);
|
||||
|
||||
/**
|
||||
* Call between pages or documents etc to free up memory and forget
|
||||
* adaptive data.
|
||||
*/
|
||||
void ClearAdaptiveClassifier();
|
||||
|
||||
/**
|
||||
* @defgroup AdvancedAPI Advanced API
|
||||
* The following methods break TesseractRect into pieces, so you can
|
||||
* get hold of the thresholded image, get the text in different formats,
|
||||
* get bounding boxes, confidences etc.
|
||||
*/
|
||||
/* @{ */
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. Format is as
|
||||
* TesseractRect above. Copies the image buffer and converts to Pix.
|
||||
* SetImage clears all recognition results, and sets the rectangle to the
|
||||
* full image, so it may be followed immediately by a GetUTF8Text, and it
|
||||
* will automatically perform recognition.
|
||||
*/
|
||||
void SetImage(const unsigned char *imagedata, int width, int height,
|
||||
int bytes_per_pixel, int bytes_per_line);
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. As with SetImage above,
|
||||
* Tesseract takes its own copy of the image, so it need not persist until
|
||||
* after Recognize.
|
||||
* Pix vs raw, which to use?
|
||||
* Use Pix where possible. Tesseract uses Pix as its internal representation
|
||||
* and it is therefore more efficient to provide a Pix directly.
|
||||
*/
|
||||
void SetImage(Pix *pix);
|
||||
|
||||
/**
|
||||
* Set the resolution of the source image in pixels per inch so font size
|
||||
* information can be calculated in results. Call this after SetImage().
|
||||
*/
|
||||
void SetSourceResolution(int ppi);
|
||||
|
||||
/**
|
||||
* Restrict recognition to a sub-rectangle of the image. Call after SetImage.
|
||||
* Each SetRectangle clears the recogntion results so multiple rectangles
|
||||
* can be recognized with the same image.
|
||||
*/
|
||||
void SetRectangle(int left, int top, int width, int height);
|
||||
|
||||
/**
|
||||
* Get a copy of the internal thresholded image from Tesseract.
|
||||
* Caller takes ownership of the Pix and must pixDestroy it.
|
||||
* May be called any time after SetImage, or after TesseractRect.
|
||||
*/
|
||||
Pix *GetThresholdedImage();
|
||||
|
||||
/**
|
||||
* Get the result of page layout analysis as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
*/
|
||||
Boxa *GetRegions(Pixa **pixa);
|
||||
|
||||
/**
|
||||
* Get the textlines as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* If raw_image is true, then extract from the original image instead of the
|
||||
* thresholded image and pad by raw_padding pixels.
|
||||
* If blockids is not nullptr, the block-id of each line is also returned as
|
||||
* an array of one element per line. delete [] after use. If paraids is not
|
||||
* nullptr, the paragraph-id of each line within its block is also returned as
|
||||
* an array of one element per line. delete [] after use.
|
||||
*/
|
||||
Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
/*
|
||||
Helper method to extract from the thresholded image. (most common usage)
|
||||
*/
|
||||
Boxa *GetTextlines(Pixa **pixa, int **blockids) {
|
||||
return GetTextlines(false, 0, pixa, blockids, nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
|
||||
* pair, in reading order. Enables downstream handling of non-rectangular
|
||||
* regions.
|
||||
* Can be called before or after Recognize.
|
||||
* If blockids is not nullptr, the block-id of each line is also returned as
|
||||
* an array of one element per line. delete [] after use.
|
||||
*/
|
||||
Boxa *GetStrips(Pixa **pixa, int **blockids);
|
||||
|
||||
/**
|
||||
* Get the words as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
*/
|
||||
Boxa *GetWords(Pixa **pixa);
|
||||
|
||||
/**
|
||||
* Gets the individual connected (text) components (created
|
||||
* after pages segmentation step, but before recognition)
|
||||
* as a leptonica-style Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* Note: the caller is responsible for calling boxaDestroy()
|
||||
* on the returned Boxa array and pixaDestroy() on cc array.
|
||||
*/
|
||||
Boxa *GetConnectedComponents(Pixa **cc);
|
||||
|
||||
/**
|
||||
* Get the given level kind of components (block, textline, word etc.) as a
|
||||
* leptonica-style Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* If blockids is not nullptr, the block-id of each component is also returned
|
||||
* as an array of one element per component. delete [] after use.
|
||||
* If blockids is not nullptr, the paragraph-id of each component with its
|
||||
* block is also returned as an array of one element per component. delete []
|
||||
* after use. If raw_image is true, then portions of the original image are
|
||||
* extracted instead of the thresholded image and padded with raw_padding. If
|
||||
* text_only is true, then only text components are returned.
|
||||
*/
|
||||
Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
|
||||
bool raw_image, int raw_padding, Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
// Helper function to get binary images with no padding (most common usage).
|
||||
Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
|
||||
Pixa **pixa, int **blockids) {
|
||||
return GetComponentImages(level, text_only, false, 0, pixa, blockids,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the scale factor of the thresholded image that would be returned by
|
||||
* GetThresholdedImage() and the various GetX() methods that call
|
||||
* GetComponentImages().
|
||||
* Returns 0 if no thresholder has been set.
|
||||
*/
|
||||
int GetThresholdedImageScaleFactor() const;
|
||||
|
||||
/**
|
||||
* Runs page layout analysis in the mode set by SetPageSegMode.
|
||||
* May optionally be called prior to Recognize to get access to just
|
||||
* the page layout results. Returns an iterator to the results.
|
||||
* If merge_similar_words is true, words are combined where suitable for use
|
||||
* with a line recognizer. Use if you want to use AnalyseLayout to find the
|
||||
* textlines, and then want to process textline fragments with an external
|
||||
* line recognizer.
|
||||
* Returns nullptr on error or an empty page.
|
||||
* The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
PageIterator *AnalyseLayout();
|
||||
PageIterator *AnalyseLayout(bool merge_similar_words);
|
||||
|
||||
/**
|
||||
* Recognize the image from SetAndThresholdImage, generating Tesseract
|
||||
* internal structures. Returns 0 on success.
|
||||
* Optional. The Get*Text functions below will call Recognize if needed.
|
||||
* After Recognize, the output is kept internally until the next SetImage.
|
||||
*/
|
||||
int Recognize(ETEXT_DESC *monitor);
|
||||
|
||||
/**
|
||||
* Methods to retrieve information after SetAndThresholdImage(),
|
||||
* Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
|
||||
*/
|
||||
|
||||
/**
|
||||
* Turns images into symbolic text.
|
||||
*
|
||||
* filename can point to a single image, a multi-page TIFF,
|
||||
* or a plain text list of image filenames.
|
||||
*
|
||||
* retry_config is useful for debugging. If not nullptr, you can fall
|
||||
* back to an alternate configuration if a page fails for some
|
||||
* reason.
|
||||
*
|
||||
* timeout_millisec terminates processing if any single page
|
||||
* takes too long. Set to 0 for unlimited time.
|
||||
*
|
||||
* renderer is responible for creating the output. For example,
|
||||
* use the TessTextRenderer if you want plaintext output, or
|
||||
* the TessPDFRender to produce searchable PDF.
|
||||
*
|
||||
* If tessedit_page_number is non-negative, will only process that
|
||||
* single page. Works for multi-page tiff file, or filelist.
|
||||
*
|
||||
* Returns true if successful, false on error.
|
||||
*/
|
||||
bool ProcessPages(const char *filename, const char *retry_config,
|
||||
int timeout_millisec, TessResultRenderer *renderer);
|
||||
// Does the real work of ProcessPages.
|
||||
bool ProcessPagesInternal(const char *filename, const char *retry_config,
|
||||
int timeout_millisec, TessResultRenderer *renderer);
|
||||
|
||||
/**
|
||||
* Turn a single image into symbolic text.
|
||||
*
|
||||
* The pix is the image processed. filename and page_index are
|
||||
* metadata used by side-effect processes, such as reading a box
|
||||
* file or formatting as hOCR.
|
||||
*
|
||||
* See ProcessPages for descriptions of other parameters.
|
||||
*/
|
||||
bool ProcessPage(Pix *pix, int page_index, const char *filename,
|
||||
const char *retry_config, int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
|
||||
/**
|
||||
* Get a reading-order iterator to the results of LayoutAnalysis and/or
|
||||
* Recognize. The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
ResultIterator *GetIterator();
|
||||
|
||||
/**
|
||||
* Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
|
||||
* The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
MutableIterator *GetMutableIterator();
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetUTF8Text();
|
||||
|
||||
/**
|
||||
* Make a HTML-formatted string with hOCR markup from the internal
|
||||
* data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* monitor can be used to
|
||||
* cancel the recognition
|
||||
* receive progress callbacks
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
|
||||
|
||||
/**
|
||||
* Make a HTML-formatted string with hOCR markup from the internal
|
||||
* data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetHOCRText(int page_number);
|
||||
|
||||
/**
|
||||
* Make an XML-formatted string with Alto markup from the internal
|
||||
* data structures.
|
||||
*/
|
||||
char *GetAltoText(ETEXT_DESC *monitor, int page_number);
|
||||
|
||||
/**
|
||||
* Make an XML-formatted string with Alto markup from the internal
|
||||
* data structures.
|
||||
*/
|
||||
char *GetAltoText(int page_number);
|
||||
|
||||
/**
|
||||
* Make a TSV-formatted string from the internal data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetTSVText(int page_number);
|
||||
|
||||
/**
|
||||
* Make a box file for LSTM training from the internal data structures.
|
||||
* Constructs coordinates in the original image - not just the rectangle.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetLSTMBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded in the same
|
||||
* format as a box file used in training.
|
||||
* Constructs coordinates in the original image - not just the rectangle.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded in the same
|
||||
* format as a WordStr box file used in training.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetWordStrBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UNLV format Latin-1 with specific reject and suspect codes.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetUNLVText();
|
||||
|
||||
/**
|
||||
* Detect the orientation of the input image and apparent script (alphabet).
|
||||
* orient_deg is the detected clockwise rotation of the input image in degrees
|
||||
* (0, 90, 180, 270)
|
||||
* orient_conf is the confidence (15.0 is reasonably confident)
|
||||
* script_name is an ASCII string, the name of the script, e.g. "Latin"
|
||||
* script_conf is confidence level in the script
|
||||
* Returns true on success and writes values to each parameter as an output
|
||||
*/
|
||||
bool DetectOrientationScript(int *orient_deg, float *orient_conf,
|
||||
const char **script_name, float *script_conf);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
* page_number is a 0-based page index that will appear in the osd file.
|
||||
*/
|
||||
char *GetOsdText(int page_number);
|
||||
|
||||
/** Returns the (average) confidence value between 0 and 100. */
|
||||
int MeanTextConf();
|
||||
/**
|
||||
* Returns all word confidences (between 0 and 100) in an array, terminated
|
||||
* by -1. The calling function must delete [] after use.
|
||||
* The number of confidences should correspond to the number of space-
|
||||
* delimited words in GetUTF8Text.
|
||||
*/
|
||||
int *AllWordConfidences();
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
/**
|
||||
* Applies the given word to the adaptive classifier if possible.
|
||||
* The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
|
||||
* tell the boundaries of the graphemes.
|
||||
* Assumes that SetImage/SetRectangle have been used to set the image
|
||||
* to the given word. The mode arg should be PSM_SINGLE_WORD or
|
||||
* PSM_CIRCLE_WORD, as that will be used to control layout analysis.
|
||||
* The currently set PageSegMode is preserved.
|
||||
* Returns false if adaption was not possible for some reason.
|
||||
*/
|
||||
bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Free up recognition results and any stored image data, without actually
|
||||
* freeing any recognition data that would be time-consuming to reload.
|
||||
* Afterwards, you must call SetImage or TesseractRect before doing
|
||||
* any Recognize or Get* operation.
|
||||
*/
|
||||
void Clear();
|
||||
|
||||
/**
|
||||
* Close down tesseract and free up all memory. End() is equivalent to
|
||||
* destructing and reconstructing your TessBaseAPI.
|
||||
* Once End() has been used, none of the other API functions may be used
|
||||
* other than Init and anything declared above it in the class definition.
|
||||
*/
|
||||
void End();
|
||||
|
||||
/**
|
||||
* Clear any library-level memory caches.
|
||||
* There are a variety of expensive-to-load constant data structures (mostly
|
||||
* language dictionaries) that are cached globally -- surviving the Init()
|
||||
* and End() of individual TessBaseAPI's. This function allows the clearing
|
||||
* of these caches.
|
||||
**/
|
||||
static void ClearPersistentCache();
|
||||
|
||||
/**
|
||||
* Check whether a word is valid according to Tesseract's language model
|
||||
* @return 0 if the word is invalid, non-zero if valid.
|
||||
* @warning temporary! This function will be removed from here and placed
|
||||
* in a separate API at some future time.
|
||||
*/
|
||||
int IsValidWord(const char *word) const;
|
||||
// Returns true if utf8_character is defined in the UniCharset.
|
||||
bool IsValidCharacter(const char *utf8_character) const;
|
||||
|
||||
bool GetTextDirection(int *out_offset, float *out_slope);
|
||||
|
||||
/** Sets Dict::letter_is_okay_ function to point to the given function. */
|
||||
void SetDictFunc(DictFunc f);
|
||||
|
||||
/** Sets Dict::probability_in_context_ function to point to the given
|
||||
* function.
|
||||
*/
|
||||
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
|
||||
|
||||
/**
|
||||
* Estimates the Orientation And Script of the image.
|
||||
* @return true if the image was processed successfully.
|
||||
*/
|
||||
bool DetectOS(OSResults *);
|
||||
|
||||
/**
|
||||
* Return text orientation of each block as determined by an earlier run
|
||||
* of layout analysis.
|
||||
*/
|
||||
void GetBlockTextOrientations(int **block_orientation,
|
||||
bool **vertical_writing);
|
||||
|
||||
/** This method returns the string form of the specified unichar. */
|
||||
const char *GetUnichar(int unichar_id) const;
|
||||
|
||||
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
|
||||
const Dawg *GetDawg(int i) const;
|
||||
|
||||
/** Return the number of dawgs loaded into tesseract_ object. */
|
||||
int NumDawgs() const;
|
||||
|
||||
Tesseract *tesseract() const {
|
||||
return tesseract_;
|
||||
}
|
||||
|
||||
OcrEngineMode oem() const {
|
||||
return last_oem_requested_;
|
||||
}
|
||||
|
||||
void set_min_orientation_margin(double margin);
|
||||
/* @} */
|
||||
|
||||
protected:
|
||||
/** Common code for setting the image. Returns true if Init has been called.
|
||||
*/
|
||||
bool InternalSetImage();
|
||||
|
||||
/**
|
||||
* Run the thresholder to make the thresholded image. If pix is not nullptr,
|
||||
* the source is thresholded to pix instead of the internal IMAGE.
|
||||
*/
|
||||
virtual bool Threshold(Pix **pix);
|
||||
|
||||
/**
|
||||
* Find lines from the image making the BLOCK_LIST.
|
||||
* @return 0 on success.
|
||||
*/
|
||||
int FindLines();
|
||||
|
||||
/** Delete the pageres and block list ready for a new page. */
|
||||
void ClearResults();
|
||||
|
||||
/**
|
||||
* Return an LTR Result Iterator -- used only for training, as we really want
|
||||
* to ignore all BiDi smarts at that point.
|
||||
* delete once you're done with it.
|
||||
*/
|
||||
LTRResultIterator *GetLTRIterator();
|
||||
|
||||
/**
|
||||
* Return the length of the output text string, as UTF8, assuming
|
||||
* one newline per line and one per block, with a terminator,
|
||||
* and assuming a single character reject marker for each rejected character.
|
||||
* Also return the number of recognized blobs in blob_count.
|
||||
*/
|
||||
int TextLength(int *blob_count) const;
|
||||
|
||||
//// paragraphs.cpp ////////////////////////////////////////////////////
|
||||
void DetectParagraphs(bool after_text_recognition);
|
||||
|
||||
const PAGE_RES *GetPageRes() const {
|
||||
return page_res_;
|
||||
}
|
||||
|
||||
protected:
|
||||
Tesseract *tesseract_; ///< The underlying data object.
|
||||
Tesseract *osd_tesseract_; ///< For orientation & script detection.
|
||||
EquationDetect *equ_detect_; ///< The equation detector.
|
||||
FileReader reader_; ///< Reads files from any filesystem.
|
||||
ImageThresholder *thresholder_; ///< Image thresholding module.
|
||||
std::vector<ParagraphModel *> *paragraph_models_;
|
||||
BLOCK_LIST *block_list_; ///< The page layout.
|
||||
PAGE_RES *page_res_; ///< The page-level data.
|
||||
std::string input_file_; ///< Name used by training code.
|
||||
std::string output_file_; ///< Name used by debug code.
|
||||
std::string datapath_; ///< Current location of tessdata.
|
||||
std::string language_; ///< Last initialized language.
|
||||
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
|
||||
bool recognition_done_; ///< page_res_ contains recognition data.
|
||||
|
||||
/**
|
||||
* @defgroup ThresholderParams Thresholder Parameters
|
||||
* Parameters saved from the Thresholder. Needed to rebuild coordinates.
|
||||
*/
|
||||
/* @{ */
|
||||
int rect_left_;
|
||||
int rect_top_;
|
||||
int rect_width_;
|
||||
int rect_height_;
|
||||
int image_width_;
|
||||
int image_height_;
|
||||
/* @} */
|
||||
|
||||
private:
|
||||
// A list of image filenames gets special consideration
|
||||
bool ProcessPagesFileList(FILE *fp, std::string *buf,
|
||||
const char *retry_config, int timeout_millisec,
|
||||
TessResultRenderer *renderer,
|
||||
int tessedit_page_number);
|
||||
// TIFF supports multipage so gets special consideration.
|
||||
bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
|
||||
const char *filename, const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer,
|
||||
int tessedit_page_number);
|
||||
}; // class TessBaseAPI.
|
||||
|
||||
/** Escape a char string - remove &<>"' with HTML codes. */
|
||||
std::string HOcrEscape(const char *text);
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_API_BASEAPI_H_
|
|
@ -1,484 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: capi.h
|
||||
// Description: C-API TessBaseAPI
|
||||
//
|
||||
// (C) Copyright 2012, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef API_CAPI_H_
|
||||
#define API_CAPI_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
# include <tesseract/baseapi.h>
|
||||
# include <tesseract/ocrclass.h>
|
||||
# include <tesseract/pageiterator.h>
|
||||
# include <tesseract/renderer.h>
|
||||
# include <tesseract/resultiterator.h>
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef BOOL
|
||||
# define BOOL int
|
||||
# define TRUE 1
|
||||
# define FALSE 0
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
typedef tesseract::TessResultRenderer TessResultRenderer;
|
||||
typedef tesseract::TessBaseAPI TessBaseAPI;
|
||||
typedef tesseract::PageIterator TessPageIterator;
|
||||
typedef tesseract::ResultIterator TessResultIterator;
|
||||
typedef tesseract::MutableIterator TessMutableIterator;
|
||||
typedef tesseract::ChoiceIterator TessChoiceIterator;
|
||||
typedef tesseract::OcrEngineMode TessOcrEngineMode;
|
||||
typedef tesseract::PageSegMode TessPageSegMode;
|
||||
typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
|
||||
typedef tesseract::Orientation TessOrientation;
|
||||
typedef tesseract::ParagraphJustification TessParagraphJustification;
|
||||
typedef tesseract::WritingDirection TessWritingDirection;
|
||||
typedef tesseract::TextlineOrder TessTextlineOrder;
|
||||
typedef tesseract::PolyBlockType TessPolyBlockType;
|
||||
typedef tesseract::ETEXT_DESC ETEXT_DESC;
|
||||
#else
|
||||
typedef struct TessResultRenderer TessResultRenderer;
|
||||
typedef struct TessBaseAPI TessBaseAPI;
|
||||
typedef struct TessPageIterator TessPageIterator;
|
||||
typedef struct TessResultIterator TessResultIterator;
|
||||
typedef struct TessMutableIterator TessMutableIterator;
|
||||
typedef struct TessChoiceIterator TessChoiceIterator;
|
||||
typedef enum TessOcrEngineMode {
|
||||
OEM_TESSERACT_ONLY,
|
||||
OEM_LSTM_ONLY,
|
||||
OEM_TESSERACT_LSTM_COMBINED,
|
||||
OEM_DEFAULT
|
||||
} TessOcrEngineMode;
|
||||
typedef enum TessPageSegMode {
|
||||
PSM_OSD_ONLY,
|
||||
PSM_AUTO_OSD,
|
||||
PSM_AUTO_ONLY,
|
||||
PSM_AUTO,
|
||||
PSM_SINGLE_COLUMN,
|
||||
PSM_SINGLE_BLOCK_VERT_TEXT,
|
||||
PSM_SINGLE_BLOCK,
|
||||
PSM_SINGLE_LINE,
|
||||
PSM_SINGLE_WORD,
|
||||
PSM_CIRCLE_WORD,
|
||||
PSM_SINGLE_CHAR,
|
||||
PSM_SPARSE_TEXT,
|
||||
PSM_SPARSE_TEXT_OSD,
|
||||
PSM_RAW_LINE,
|
||||
PSM_COUNT
|
||||
} TessPageSegMode;
|
||||
typedef enum TessPageIteratorLevel {
|
||||
RIL_BLOCK,
|
||||
RIL_PARA,
|
||||
RIL_TEXTLINE,
|
||||
RIL_WORD,
|
||||
RIL_SYMBOL
|
||||
} TessPageIteratorLevel;
|
||||
typedef enum TessPolyBlockType {
|
||||
PT_UNKNOWN,
|
||||
PT_FLOWING_TEXT,
|
||||
PT_HEADING_TEXT,
|
||||
PT_PULLOUT_TEXT,
|
||||
PT_EQUATION,
|
||||
PT_INLINE_EQUATION,
|
||||
PT_TABLE,
|
||||
PT_VERTICAL_TEXT,
|
||||
PT_CAPTION_TEXT,
|
||||
PT_FLOWING_IMAGE,
|
||||
PT_HEADING_IMAGE,
|
||||
PT_PULLOUT_IMAGE,
|
||||
PT_HORZ_LINE,
|
||||
PT_VERT_LINE,
|
||||
PT_NOISE,
|
||||
PT_COUNT
|
||||
} TessPolyBlockType;
|
||||
typedef enum TessOrientation {
|
||||
ORIENTATION_PAGE_UP,
|
||||
ORIENTATION_PAGE_RIGHT,
|
||||
ORIENTATION_PAGE_DOWN,
|
||||
ORIENTATION_PAGE_LEFT
|
||||
} TessOrientation;
|
||||
typedef enum TessParagraphJustification {
|
||||
JUSTIFICATION_UNKNOWN,
|
||||
JUSTIFICATION_LEFT,
|
||||
JUSTIFICATION_CENTER,
|
||||
JUSTIFICATION_RIGHT
|
||||
} TessParagraphJustification;
|
||||
typedef enum TessWritingDirection {
|
||||
WRITING_DIRECTION_LEFT_TO_RIGHT,
|
||||
WRITING_DIRECTION_RIGHT_TO_LEFT,
|
||||
WRITING_DIRECTION_TOP_TO_BOTTOM
|
||||
} TessWritingDirection;
|
||||
typedef enum TessTextlineOrder {
|
||||
TEXTLINE_ORDER_LEFT_TO_RIGHT,
|
||||
TEXTLINE_ORDER_RIGHT_TO_LEFT,
|
||||
TEXTLINE_ORDER_TOP_TO_BOTTOM
|
||||
} TessTextlineOrder;
|
||||
typedef struct ETEXT_DESC ETEXT_DESC;
|
||||
#endif
|
||||
|
||||
typedef bool (*TessCancelFunc)(void *cancel_this, int words);
|
||||
typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
|
||||
int bottom);
|
||||
|
||||
struct Pix;
|
||||
struct Boxa;
|
||||
struct Pixa;
|
||||
|
||||
/* General free functions */
|
||||
|
||||
TESS_API const char *TessVersion();
|
||||
TESS_API void TessDeleteText(const char *text);
|
||||
TESS_API void TessDeleteTextArray(char **arr);
|
||||
TESS_API void TessDeleteIntArray(const int *arr);
|
||||
|
||||
/* Renderer API */
|
||||
TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
|
||||
BOOL font_info);
|
||||
TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
|
||||
const char *datadir,
|
||||
BOOL textonly);
|
||||
TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
|
||||
const char *outputbase);
|
||||
|
||||
TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
|
||||
TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
|
||||
TessResultRenderer *next);
|
||||
TESS_API TessResultRenderer *TessResultRendererNext(
|
||||
TessResultRenderer *renderer);
|
||||
TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
|
||||
const char *title);
|
||||
TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
|
||||
TessBaseAPI *api);
|
||||
TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
|
||||
|
||||
TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
|
||||
TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
|
||||
TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
|
||||
|
||||
/* Base API */
|
||||
|
||||
TESS_API TessBaseAPI *TessBaseAPICreate();
|
||||
TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
|
||||
|
||||
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
|
||||
|
||||
TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
|
||||
TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
|
||||
TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
|
||||
TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
|
||||
|
||||
TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
|
||||
const char *value);
|
||||
TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
|
||||
const char *value);
|
||||
|
||||
TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
|
||||
const char *name, int *value);
|
||||
TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
|
||||
const char *name, BOOL *value);
|
||||
TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
|
||||
const char *name, double *value);
|
||||
TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
|
||||
const char *name);
|
||||
|
||||
TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
|
||||
TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
|
||||
TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode oem,
|
||||
char **configs, int configs_size);
|
||||
TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode oem);
|
||||
TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language);
|
||||
|
||||
TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode mode,
|
||||
char **configs, int configs_size, char **vars_vec,
|
||||
char **vars_values, size_t vars_vec_size,
|
||||
BOOL set_only_non_debug_params);
|
||||
|
||||
TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
|
||||
const char *language, TessOcrEngineMode mode,
|
||||
char **configs, int configs_size, char **vars_vec,
|
||||
char **vars_values, size_t vars_vec_size,
|
||||
BOOL set_only_non_debug_params);
|
||||
|
||||
TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
|
||||
const TessBaseAPI *handle);
|
||||
TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
|
||||
const TessBaseAPI *handle);
|
||||
TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
|
||||
const TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
|
||||
TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
|
||||
TessPageSegMode mode);
|
||||
TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
|
||||
const unsigned char *imagedata,
|
||||
int bytes_per_pixel, int bytes_per_line,
|
||||
int left, int top, int width, int height);
|
||||
|
||||
TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
|
||||
const unsigned char *imagedata, int width,
|
||||
int height, int bytes_per_pixel,
|
||||
int bytes_per_line);
|
||||
TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
|
||||
|
||||
TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
|
||||
|
||||
TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
|
||||
int width, int height);
|
||||
|
||||
TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
|
||||
TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
|
||||
struct Pixa **pixa);
|
||||
TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
|
||||
struct Pixa **pixa,
|
||||
int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
|
||||
BOOL raw_image, int raw_padding,
|
||||
struct Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
|
||||
struct Pixa **pixa, int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
|
||||
struct Pixa **pixa);
|
||||
TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
|
||||
struct Pixa **cc);
|
||||
TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
|
||||
TessPageIteratorLevel level,
|
||||
BOOL text_only,
|
||||
struct Pixa **pixa,
|
||||
int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
|
||||
TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
|
||||
BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
|
||||
int **paraids);
|
||||
|
||||
TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
|
||||
const TessBaseAPI *handle);
|
||||
|
||||
TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
|
||||
|
||||
TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
|
||||
const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
|
||||
int page_index, const char *filename,
|
||||
const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
|
||||
TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
|
||||
TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
|
||||
TessBaseAPI *handle);
|
||||
|
||||
TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
|
||||
TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
|
||||
int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
|
||||
TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
|
||||
TessPageSegMode mode,
|
||||
const char *wordstr);
|
||||
#endif // #ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
|
||||
TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
|
||||
TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
|
||||
float *out_slope);
|
||||
|
||||
TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
|
||||
|
||||
TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
// Call TessDeleteText(*best_script_name) to free memory allocated by this
|
||||
// function
|
||||
TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
|
||||
int *orient_deg,
|
||||
float *orient_conf,
|
||||
const char **script_name,
|
||||
float *script_conf);
|
||||
#endif // #ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
|
||||
double margin);
|
||||
|
||||
TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
|
||||
int **block_orientation,
|
||||
bool **vertical_writing);
|
||||
|
||||
/* Page iterator */
|
||||
|
||||
TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
|
||||
|
||||
TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
|
||||
|
||||
TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
|
||||
|
||||
TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
|
||||
TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
|
||||
TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
TessPageIteratorLevel element);
|
||||
|
||||
TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
int *left, int *top, int *right,
|
||||
int *bottom);
|
||||
|
||||
TESS_API TessPolyBlockType
|
||||
TessPageIteratorBlockType(const TessPageIterator *handle);
|
||||
|
||||
TESS_API struct Pix *TessPageIteratorGetBinaryImage(
|
||||
const TessPageIterator *handle, TessPageIteratorLevel level);
|
||||
|
||||
TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
int padding,
|
||||
struct Pix *original_image,
|
||||
int *left, int *top);
|
||||
|
||||
TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level, int *x1,
|
||||
int *y1, int *x2, int *y2);
|
||||
|
||||
TESS_API void TessPageIteratorOrientation(
|
||||
TessPageIterator *handle, TessOrientation *orientation,
|
||||
TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
|
||||
float *deskew_angle);
|
||||
|
||||
TESS_API void TessPageIteratorParagraphInfo(
|
||||
TessPageIterator *handle, TessParagraphJustification *justification,
|
||||
BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
|
||||
|
||||
/* Result iterator */
|
||||
|
||||
TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
|
||||
TESS_API TessResultIterator *TessResultIteratorCopy(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
|
||||
TessResultIterator *handle);
|
||||
TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
|
||||
const TessResultIterator *handle);
|
||||
|
||||
TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API const char *TessResultIteratorWordRecognitionLanguage(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API const char *TessResultIteratorWordFontAttributes(
|
||||
const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
|
||||
BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
|
||||
int *pointsize, int *font_id);
|
||||
|
||||
TESS_API BOOL
|
||||
TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
|
||||
TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
|
||||
|
||||
TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
|
||||
TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
|
||||
TESS_API const char *TessChoiceIteratorGetUTF8Text(
|
||||
const TessChoiceIterator *handle);
|
||||
TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
|
||||
|
||||
/* Progress monitor */
|
||||
|
||||
TESS_API ETEXT_DESC *TessMonitorCreate();
|
||||
TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
|
||||
TessCancelFunc cancelFunc);
|
||||
TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
|
||||
TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
|
||||
TessProgressFunc progressFunc);
|
||||
TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // API_CAPI_H_
|
|
@ -1,37 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: export.h
|
||||
// Description: Place holder
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_PLATFORM_H_
|
||||
#define TESSERACT_PLATFORM_H_
|
||||
|
||||
#ifndef TESS_API
|
||||
# if defined(_WIN32) || defined(__CYGWIN__)
|
||||
# if defined(TESS_EXPORTS)
|
||||
# define TESS_API __declspec(dllexport)
|
||||
# elif defined(TESS_IMPORTS)
|
||||
# define TESS_API __declspec(dllimport)
|
||||
# else
|
||||
# define TESS_API
|
||||
# endif
|
||||
# else
|
||||
# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
|
||||
# define TESS_API __attribute__((visibility("default")))
|
||||
# else
|
||||
# define TESS_API
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif // TESSERACT_PLATFORM_H_
|
|
@ -1,235 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: ltrresultiterator.h
|
||||
// Description: Iterator for tesseract results in strict left-to-right
|
||||
// order that avoids using tesseract internal data structures.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
||||
|
||||
#include "export.h" // for TESS_API
|
||||
#include "pageiterator.h" // for PageIterator
|
||||
#include "publictypes.h" // for PageIteratorLevel
|
||||
#include "unichar.h" // for StrongScriptDirection
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class BLOB_CHOICE_IT;
|
||||
class PAGE_RES;
|
||||
class WERD_RES;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
// Class to iterate over tesseract results, providing access to all levels
|
||||
// of the page hierarchy, without including any tesseract headers or having
|
||||
// to handle any tesseract structures.
|
||||
// WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
// therefore can only be used while the TessBaseAPI class still exists and
|
||||
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
// DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
// See tesseract/publictypes.h for the definition of PageIteratorLevel.
|
||||
// See also base class PageIterator, which contains the bulk of the interface.
|
||||
// LTRResultIterator adds text-specific methods for access to OCR output.
|
||||
|
||||
class TESS_API LTRResultIterator : public PageIterator {
|
||||
friend class ChoiceIterator;
|
||||
|
||||
public:
|
||||
// page_res and tesseract come directly from the BaseAPI.
|
||||
// The rectangle parameters are copied indirectly from the Thresholder,
|
||||
// via the BaseAPI. They represent the coordinates of some rectangle in an
|
||||
// original image (in top-left-origin coordinates) and therefore the top-left
|
||||
// needs to be added to any output boxes in order to specify coordinates
|
||||
// in the original image. See TessBaseAPI::SetRectangle.
|
||||
// The scale and scaled_yres are in case the Thresholder scaled the image
|
||||
// rectangle prior to thresholding. Any coordinates in tesseract's image
|
||||
// must be divided by scale before adding (rect_left, rect_top).
|
||||
// The scaled_yres indicates the effective resolution of the binary image
|
||||
// that tesseract has been given by the Thresholder.
|
||||
// After the constructor, Begin has already been called.
|
||||
LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
|
||||
int scaled_yres, int rect_left, int rect_top,
|
||||
int rect_width, int rect_height);
|
||||
|
||||
~LTRResultIterator() override;
|
||||
|
||||
// LTRResultIterators may be copied! This makes it possible to iterate over
|
||||
// all the objects at a lower level, while maintaining an iterator to
|
||||
// objects at a higher level. These constructors DO NOT CALL Begin, so
|
||||
// iterations will continue from the location of src.
|
||||
// TODO: For now the copy constructor and operator= only need the base class
|
||||
// versions, but if new data members are added, don't forget to add them!
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
|
||||
// See PageIterator.
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
// Returns the null terminated UTF-8 encoded text string for the current
|
||||
// object at the given level. Use delete [] to free after use.
|
||||
char *GetUTF8Text(PageIteratorLevel level) const;
|
||||
|
||||
// Set the string inserted at the end of each text line. "\n" by default.
|
||||
void SetLineSeparator(const char *new_line);
|
||||
|
||||
// Set the string inserted at the end of each paragraph. "\n" by default.
|
||||
void SetParagraphSeparator(const char *new_para);
|
||||
|
||||
// Returns the mean confidence of the current object at the given level.
|
||||
// The number should be interpreted as a percent probability. (0.0f-100.0f)
|
||||
float Confidence(PageIteratorLevel level) const;
|
||||
|
||||
// ============= Functions that refer to words only ============.
|
||||
|
||||
// Returns the font attributes of the current word. If iterating at a higher
|
||||
// level object than words, eg textlines, then this will return the
|
||||
// attributes of the first word in that textline.
|
||||
// The actual return value is a string representing a font name. It points
|
||||
// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
|
||||
// the iterator itself, ie rendered invalid by various members of
|
||||
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
|
||||
// Pointsize is returned in printers points (1/72 inch.)
|
||||
const char *WordFontAttributes(bool *is_bold, bool *is_italic,
|
||||
bool *is_underlined, bool *is_monospace,
|
||||
bool *is_serif, bool *is_smallcaps,
|
||||
int *pointsize, int *font_id) const;
|
||||
|
||||
// Return the name of the language used to recognize this word.
|
||||
// On error, nullptr. Do not delete this pointer.
|
||||
const char *WordRecognitionLanguage() const;
|
||||
|
||||
// Return the overall directionality of this word.
|
||||
StrongScriptDirection WordDirection() const;
|
||||
|
||||
// Returns true if the current word was found in a dictionary.
|
||||
bool WordIsFromDictionary() const;
|
||||
|
||||
// Returns the number of blanks before the current word.
|
||||
int BlanksBeforeWord() const;
|
||||
|
||||
// Returns true if the current word is numeric.
|
||||
bool WordIsNumeric() const;
|
||||
|
||||
// Returns true if the word contains blamer information.
|
||||
bool HasBlamerInfo() const;
|
||||
|
||||
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
|
||||
// of the current word.
|
||||
const void *GetParamsTrainingBundle() const;
|
||||
|
||||
// Returns a pointer to the string with blamer information for this word.
|
||||
// Assumes that the word's blamer_bundle is not nullptr.
|
||||
const char *GetBlamerDebug() const;
|
||||
|
||||
// Returns a pointer to the string with misadaption information for this word.
|
||||
// Assumes that the word's blamer_bundle is not nullptr.
|
||||
const char *GetBlamerMisadaptionDebug() const;
|
||||
|
||||
// Returns true if a truth string was recorded for the current word.
|
||||
bool HasTruthString() const;
|
||||
|
||||
// Returns true if the given string is equivalent to the truth string for
|
||||
// the current word.
|
||||
bool EquivalentToTruth(const char *str) const;
|
||||
|
||||
// Returns a null terminated UTF-8 encoded truth string for the current word.
|
||||
// Use delete [] to free after use.
|
||||
char *WordTruthUTF8Text() const;
|
||||
|
||||
// Returns a null terminated UTF-8 encoded normalized OCR string for the
|
||||
// current word. Use delete [] to free after use.
|
||||
char *WordNormedUTF8Text() const;
|
||||
|
||||
// Returns a pointer to serialized choice lattice.
|
||||
// Fills lattice_size with the number of bytes in lattice data.
|
||||
const char *WordLattice(int *lattice_size) const;
|
||||
|
||||
// ============= Functions that refer to symbols only ============.
|
||||
|
||||
// Returns true if the current symbol is a superscript.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsSuperscript() const;
|
||||
// Returns true if the current symbol is a subscript.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsSubscript() const;
|
||||
// Returns true if the current symbol is a dropcap.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsDropcap() const;
|
||||
|
||||
protected:
|
||||
const char *line_separator_;
|
||||
const char *paragraph_separator_;
|
||||
};
|
||||
|
||||
// Class to iterate over the classifier choices for a single RIL_SYMBOL.
|
||||
class TESS_API ChoiceIterator {
|
||||
public:
|
||||
// Construction is from a LTRResultIterator that points to the symbol of
|
||||
// interest. The ChoiceIterator allows a one-shot iteration over the
|
||||
// choices for this symbol and after that it is useless.
|
||||
explicit ChoiceIterator(const LTRResultIterator &result_it);
|
||||
~ChoiceIterator();
|
||||
|
||||
// Moves to the next choice for the symbol and returns false if there
|
||||
// are none left.
|
||||
bool Next();
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
// Returns the null terminated UTF-8 encoded text string for the current
|
||||
// choice.
|
||||
// NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
|
||||
// internal structure and should NOT be delete[]ed to free after use.
|
||||
const char *GetUTF8Text() const;
|
||||
|
||||
// Returns the confidence of the current choice depending on the used language
|
||||
// data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
|
||||
// choices for one symbol should roughly add up to 1.0f.
|
||||
// If only traineddata of the legacy engine is used, the number should be
|
||||
// interpreted as a percent probability. (0.0f-100.0f) In this case
|
||||
// probabilities won't add up to 100. Each one stands on its own.
|
||||
float Confidence() const;
|
||||
|
||||
// Returns a vector containing all timesteps, which belong to the currently
|
||||
// selected symbol. A timestep is a vector containing pairs of symbols and
|
||||
// floating point numbers. The number states the probability for the
|
||||
// corresponding symbol.
|
||||
std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
|
||||
|
||||
private:
|
||||
// clears the remaining spaces out of the results and adapt the probabilities
|
||||
void filterSpaces();
|
||||
// Pointer to the WERD_RES object owned by the API.
|
||||
WERD_RES *word_res_;
|
||||
// Iterator over the blob choices.
|
||||
BLOB_CHOICE_IT *choice_it_;
|
||||
std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
|
||||
std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
|
||||
|
||||
const int *tstep_index_;
|
||||
// regulates the rating granularity
|
||||
double rating_coefficient_;
|
||||
// leading blanks
|
||||
int blanks_before_word_;
|
||||
// true when there is lstm engine related trained data
|
||||
bool oemLSTM_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
|
@ -1,158 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
/**********************************************************************
|
||||
* File: ocrclass.h
|
||||
* Description: Class definitions and constants for the OCR API.
|
||||
* Author: Hewlett-Packard Co
|
||||
*
|
||||
* (C) Copyright 1996, Hewlett-Packard Co.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
/**********************************************************************
|
||||
* This file contains typedefs for all the structures used by
|
||||
* the HP OCR interface.
|
||||
* The structures are designed to allow them to be used with any
|
||||
* structure alignment up to 8.
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef CCUTIL_OCRCLASS_H_
|
||||
#define CCUTIL_OCRCLASS_H_
|
||||
|
||||
#include <chrono>
|
||||
#include <ctime>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/**********************************************************************
|
||||
* EANYCODE_CHAR
|
||||
* Description of a single character. The character code is defined by
|
||||
* the character set of the current font.
|
||||
* Output text is sent as an array of these structures.
|
||||
* Spaces and line endings in the output are represented in the
|
||||
* structures of the surrounding characters. They are not directly
|
||||
* represented as characters.
|
||||
* The first character in a word has a positive value of blanks.
|
||||
* Missing information should be set to the defaults in the comments.
|
||||
* If word bounds are known, but not character bounds, then the top and
|
||||
* bottom of each character should be those of the word. The left of the
|
||||
* first and right of the last char in each word should be set. All other
|
||||
* lefts and rights should be set to -1.
|
||||
* If set, the values of right and bottom are left+width and top+height.
|
||||
* Most of the members come directly from the parameters to ocr_append_char.
|
||||
* The formatting member uses the enhancement parameter and combines the
|
||||
* line direction stuff into the top 3 bits.
|
||||
* The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
|
||||
* 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
|
||||
* the coding is, only that it is backwards compatible with the previous
|
||||
* version.
|
||||
**********************************************************************/
|
||||
|
||||
struct EANYCODE_CHAR { /*single character */
|
||||
// It should be noted that the format for char_code for version 2.0 and beyond
|
||||
// is UTF8 which means that ASCII characters will come out as one structure
|
||||
// but other characters will be returned in two or more instances of this
|
||||
// structure with a single byte of the UTF8 code in each, but each will have
|
||||
// the same bounding box. Programs which want to handle languagues with
|
||||
// different characters sets will need to handle extended characters
|
||||
// appropriately, but *all* code needs to be prepared to receive UTF8 coded
|
||||
// characters for characters such as bullet and fancy quotes.
|
||||
uint16_t char_code; /*character itself */
|
||||
int16_t left; /*of char (-1) */
|
||||
int16_t right; /*of char (-1) */
|
||||
int16_t top; /*of char (-1) */
|
||||
int16_t bottom; /*of char (-1) */
|
||||
int16_t font_index; /*what font (0) */
|
||||
uint8_t confidence; /*0=perfect, 100=reject (0/100) */
|
||||
uint8_t point_size; /*of char, 72=i inch, (10) */
|
||||
int8_t blanks; /*no of spaces before this char (1) */
|
||||
uint8_t formatting; /*char formatting (0) */
|
||||
};
|
||||
|
||||
/**********************************************************************
|
||||
* ETEXT_DESC
|
||||
* Description of the output of the OCR engine.
|
||||
* This structure is used as both a progress monitor and the final
|
||||
* output header, since it needs to be a valid progress monitor while
|
||||
* the OCR engine is storing its output to shared memory.
|
||||
* During progress, all the buffer info is -1.
|
||||
* Progress starts at 0 and increases to 100 during OCR. No other constraint.
|
||||
* Additionally the progress callback contains the bounding box of the word that
|
||||
* is currently being processed.
|
||||
* Every progress callback, the OCR engine must set ocr_alive to 1.
|
||||
* The HP side will set ocr_alive to 0. Repeated failure to reset
|
||||
* to 1 indicates that the OCR engine is dead.
|
||||
* If the cancel function is not null then it is called with the number of
|
||||
* user words found. If it returns true then operation is cancelled.
|
||||
**********************************************************************/
|
||||
class ETEXT_DESC;
|
||||
|
||||
using CANCEL_FUNC = bool (*)(void *, int);
|
||||
using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
|
||||
using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
|
||||
|
||||
class ETEXT_DESC { // output header
|
||||
public:
|
||||
int16_t count{0}; /// chars in this buffer(0)
|
||||
int16_t progress{0}; /// percent complete increasing (0-100)
|
||||
/** Progress monitor covers word recognition and it does not cover layout
|
||||
* analysis.
|
||||
* See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
|
||||
int8_t more_to_come{0}; /// true if not last
|
||||
volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
|
||||
int8_t err_code{0}; /// for errcode use
|
||||
CANCEL_FUNC cancel{nullptr}; /// returns true to cancel
|
||||
PROGRESS_FUNC progress_callback{
|
||||
nullptr}; /// called whenever progress increases
|
||||
PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
|
||||
void *cancel_this{nullptr}; /// this or other data for cancel
|
||||
std::chrono::steady_clock::time_point end_time;
|
||||
/// Time to stop. Expected to be set only
|
||||
/// by call to set_deadline_msecs().
|
||||
EANYCODE_CHAR text[1]{}; /// character data
|
||||
|
||||
ETEXT_DESC() : progress_callback2(&default_progress_func) {
|
||||
end_time = std::chrono::time_point<std::chrono::steady_clock,
|
||||
std::chrono::milliseconds>();
|
||||
}
|
||||
|
||||
// Sets the end time to be deadline_msecs milliseconds from now.
|
||||
void set_deadline_msecs(int32_t deadline_msecs) {
|
||||
if (deadline_msecs > 0) {
|
||||
end_time = std::chrono::steady_clock::now() +
|
||||
std::chrono::milliseconds(deadline_msecs);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns false if we've not passed the end_time, or have not set a deadline.
|
||||
bool deadline_exceeded() const {
|
||||
if (end_time.time_since_epoch() ==
|
||||
std::chrono::steady_clock::duration::zero()) {
|
||||
return false;
|
||||
}
|
||||
auto now = std::chrono::steady_clock::now();
|
||||
return (now > end_time);
|
||||
}
|
||||
|
||||
private:
|
||||
static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
|
||||
int top, int bottom) {
|
||||
if (ths->progress_callback != nullptr) {
|
||||
return (*(ths->progress_callback))(ths->progress, left, right, top,
|
||||
bottom);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // CCUTIL_OCRCLASS_H_
|
|
@ -1,139 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: osdetect.h
|
||||
// Description: Orientation and script detection.
|
||||
// Author: Samuel Charron
|
||||
// Ranjith Unnikrishnan
|
||||
//
|
||||
// (C) Copyright 2008, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_OSDETECT_H_
|
||||
#define TESSERACT_CCMAIN_OSDETECT_H_
|
||||
|
||||
#include "export.h" // for TESS_API
|
||||
|
||||
#include <vector> // for std::vector
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class BLOBNBOX;
|
||||
class BLOBNBOX_CLIST;
|
||||
class BLOB_CHOICE_LIST;
|
||||
class TO_BLOCK_LIST;
|
||||
class UNICHARSET;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
|
||||
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
|
||||
|
||||
struct OSBestResult {
|
||||
OSBestResult()
|
||||
: orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
|
||||
int orientation_id;
|
||||
int script_id;
|
||||
float sconfidence;
|
||||
float oconfidence;
|
||||
};
|
||||
|
||||
struct OSResults {
|
||||
OSResults() : unicharset(nullptr) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
|
||||
scripts_na[i][j] = 0;
|
||||
}
|
||||
orientations[i] = 0;
|
||||
}
|
||||
}
|
||||
void update_best_orientation();
|
||||
// Set the estimate of the orientation to the given id.
|
||||
void set_best_orientation(int orientation_id);
|
||||
// Update/Compute the best estimate of the script assuming the given
|
||||
// orientation id.
|
||||
void update_best_script(int orientation_id);
|
||||
// Return the index of the script with the highest score for this orientation.
|
||||
TESS_API int get_best_script(int orientation_id) const;
|
||||
// Accumulate scores with given OSResults instance and update the best script.
|
||||
void accumulate(const OSResults &osr);
|
||||
|
||||
// Print statistics.
|
||||
void print_scores(void) const;
|
||||
void print_scores(int orientation_id) const;
|
||||
|
||||
// Array holding scores for each orientation id [0,3].
|
||||
// Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
|
||||
// page respectively, where the values refer to the amount of clockwise
|
||||
// rotation to be applied to the page for the text to be upright and readable.
|
||||
float orientations[4];
|
||||
// Script confidence scores for each of 4 possible orientations.
|
||||
float scripts_na[4][kMaxNumberOfScripts];
|
||||
|
||||
UNICHARSET *unicharset;
|
||||
OSBestResult best_result;
|
||||
};
|
||||
|
||||
class OrientationDetector {
|
||||
public:
|
||||
OrientationDetector(const std::vector<int> *allowed_scripts,
|
||||
OSResults *results);
|
||||
bool detect_blob(BLOB_CHOICE_LIST *scores);
|
||||
int get_orientation();
|
||||
|
||||
private:
|
||||
OSResults *osr_;
|
||||
const std::vector<int> *allowed_scripts_;
|
||||
};
|
||||
|
||||
class ScriptDetector {
|
||||
public:
|
||||
ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
void detect_blob(BLOB_CHOICE_LIST *scores);
|
||||
bool must_stop(int orientation) const;
|
||||
|
||||
private:
|
||||
OSResults *osr_;
|
||||
static const char *korean_script_;
|
||||
static const char *japanese_script_;
|
||||
static const char *fraktur_script_;
|
||||
int korean_id_;
|
||||
int japanese_id_;
|
||||
int katakana_id_;
|
||||
int hiragana_id_;
|
||||
int han_id_;
|
||||
int hangul_id_;
|
||||
int latin_id_;
|
||||
int fraktur_id_;
|
||||
tesseract::Tesseract *tess_;
|
||||
const std::vector<int> *allowed_scripts_;
|
||||
};
|
||||
|
||||
int orientation_and_script_detection(const char *filename, OSResults *,
|
||||
tesseract::Tesseract *);
|
||||
|
||||
int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
|
||||
int os_detect_blobs(const std::vector<int> *allowed_scripts,
|
||||
BLOBNBOX_CLIST *blob_list, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
|
||||
bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
|
||||
OSResults *, tesseract::Tesseract *tess);
|
||||
|
||||
// Helper method to convert an orientation index to its value in degrees.
|
||||
// The value represents the amount of clockwise rotation in degrees that must be
|
||||
// applied for the text to be upright (readable).
|
||||
TESS_API int OrientationIdToValue(const int &id);
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCMAIN_OSDETECT_H_
|
|
@ -1,364 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: pageiterator.h
|
||||
// Description: Iterator for tesseract page structure that avoids using
|
||||
// tesseract internal data structures.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_PAGEITERATOR_H_
|
||||
|
||||
#include "export.h"
|
||||
#include "publictypes.h"
|
||||
|
||||
struct Pix;
|
||||
struct Pta;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
struct BlamerBundle;
|
||||
class C_BLOB_IT;
|
||||
class PAGE_RES;
|
||||
class PAGE_RES_IT;
|
||||
class WERD;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
/**
|
||||
* Class to iterate over tesseract page structure, providing access to all
|
||||
* levels of the page hierarchy, without including any tesseract headers or
|
||||
* having to handle any tesseract structures.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
* See tesseract/publictypes.h for the definition of PageIteratorLevel.
|
||||
* See also ResultIterator, derived from PageIterator, which adds in the
|
||||
* ability to access OCR output with text-specific methods.
|
||||
*/
|
||||
|
||||
class TESS_API PageIterator {
|
||||
public:
|
||||
/**
|
||||
* page_res and tesseract come directly from the BaseAPI.
|
||||
* The rectangle parameters are copied indirectly from the Thresholder,
|
||||
* via the BaseAPI. They represent the coordinates of some rectangle in an
|
||||
* original image (in top-left-origin coordinates) and therefore the top-left
|
||||
* needs to be added to any output boxes in order to specify coordinates
|
||||
* in the original image. See TessBaseAPI::SetRectangle.
|
||||
* The scale and scaled_yres are in case the Thresholder scaled the image
|
||||
* rectangle prior to thresholding. Any coordinates in tesseract's image
|
||||
* must be divided by scale before adding (rect_left, rect_top).
|
||||
* The scaled_yres indicates the effective resolution of the binary image
|
||||
* that tesseract has been given by the Thresholder.
|
||||
* After the constructor, Begin has already been called.
|
||||
*/
|
||||
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
|
||||
int scaled_yres, int rect_left, int rect_top, int rect_width,
|
||||
int rect_height);
|
||||
virtual ~PageIterator();
|
||||
|
||||
/**
|
||||
* Page/ResultIterators may be copied! This makes it possible to iterate over
|
||||
* all the objects at a lower level, while maintaining an iterator to
|
||||
* objects at a higher level. These constructors DO NOT CALL Begin, so
|
||||
* iterations will continue from the location of src.
|
||||
*/
|
||||
PageIterator(const PageIterator &src);
|
||||
const PageIterator &operator=(const PageIterator &src);
|
||||
|
||||
/** Are we positioned at the same location as other? */
|
||||
bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
|
||||
/**
|
||||
* Moves the iterator to point to the start of the page to begin an
|
||||
* iteration.
|
||||
*/
|
||||
virtual void Begin();
|
||||
|
||||
/**
|
||||
* Moves the iterator to the beginning of the paragraph.
|
||||
* This class implements this functionality by moving it to the zero indexed
|
||||
* blob of the first (leftmost) word on the first row of the paragraph.
|
||||
*/
|
||||
virtual void RestartParagraph();
|
||||
|
||||
/**
|
||||
* Return whether this iterator points anywhere in the first textline of a
|
||||
* paragraph.
|
||||
*/
|
||||
bool IsWithinFirstTextlineOfParagraph() const;
|
||||
|
||||
/**
|
||||
* Moves the iterator to the beginning of the text line.
|
||||
* This class implements this functionality by moving it to the zero indexed
|
||||
* blob of the first (leftmost) word of the row.
|
||||
*/
|
||||
virtual void RestartRow();
|
||||
|
||||
/**
|
||||
* Moves to the start of the next object at the given level in the
|
||||
* page hierarchy, and returns false if the end of the page was reached.
|
||||
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
|
||||
* PageIteratorLevel level values will visit each non-text block once.
|
||||
* Think of non text blocks as containing a single para, with a single line,
|
||||
* with a single imaginary word.
|
||||
* Calls to Next with different levels may be freely intermixed.
|
||||
* This function iterates words in right-to-left scripts correctly, if
|
||||
* the appropriate language has been loaded into Tesseract.
|
||||
*/
|
||||
virtual bool Next(PageIteratorLevel level);
|
||||
|
||||
/**
|
||||
* Returns true if the iterator is at the start of an object at the given
|
||||
* level.
|
||||
*
|
||||
* For instance, suppose an iterator it is pointed to the first symbol of the
|
||||
* first word of the third line of the second paragraph of the first block in
|
||||
* a page, then:
|
||||
* it.IsAtBeginningOf(RIL_BLOCK) = false
|
||||
* it.IsAtBeginningOf(RIL_PARA) = false
|
||||
* it.IsAtBeginningOf(RIL_TEXTLINE) = true
|
||||
* it.IsAtBeginningOf(RIL_WORD) = true
|
||||
* it.IsAtBeginningOf(RIL_SYMBOL) = true
|
||||
*/
|
||||
virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns whether the iterator is positioned at the last element in a
|
||||
* given level. (e.g. the last word in a line, the last line in a block)
|
||||
*
|
||||
* Here's some two-paragraph example
|
||||
* text. It starts off innocuously
|
||||
* enough but quickly turns bizarre.
|
||||
* The author inserts a cornucopia
|
||||
* of words to guard against confused
|
||||
* references.
|
||||
*
|
||||
* Now take an iterator it pointed to the start of "bizarre."
|
||||
* it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
|
||||
* it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
|
||||
* it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
|
||||
*/
|
||||
virtual bool IsAtFinalElement(PageIteratorLevel level,
|
||||
PageIteratorLevel element) const;
|
||||
|
||||
/**
|
||||
* Returns whether this iterator is positioned
|
||||
* before other: -1
|
||||
* equal to other: 0
|
||||
* after other: 1
|
||||
*/
|
||||
int Cmp(const PageIterator &other) const;
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
// Coordinate system:
|
||||
// Integer coordinates are at the cracks between the pixels.
|
||||
// The top-left corner of the top-left pixel in the image is at (0,0).
|
||||
// The bottom-right corner of the bottom-right pixel in the image is at
|
||||
// (width, height).
|
||||
// Every bounding box goes from the top-left of the top-left contained
|
||||
// pixel to the bottom-right of the bottom-right contained pixel, so
|
||||
// the bounding box of the single top-left pixel in the image is:
|
||||
// (0,0)->(1,1).
|
||||
// If an image rectangle has been set in the API, then returned coordinates
|
||||
// relate to the original (full) image, rather than the rectangle.
|
||||
|
||||
/**
|
||||
* Controls what to include in a bounding box. Bounding boxes of all levels
|
||||
* between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
|
||||
* Between layout analysis and recognition, it isn't known where all
|
||||
* diacritics belong, so this control is used to include or exclude some
|
||||
* diacritics that are above or below the main body of the word. In most cases
|
||||
* where the placement is obvious, and after recognition, it doesn't make as
|
||||
* much difference, as the diacritics will already be included in the word.
|
||||
*/
|
||||
void SetBoundingBoxComponents(bool include_upper_dots,
|
||||
bool include_lower_dots) {
|
||||
include_upper_dots_ = include_upper_dots;
|
||||
include_lower_dots_ = include_lower_dots;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the bounding rectangle of the current object at the given level.
|
||||
* See comment on coordinate system above.
|
||||
* Returns false if there is no such object at the current position.
|
||||
* The returned bounding box is guaranteed to match the size and position
|
||||
* of the image returned by GetBinaryImage, but may clip foreground pixels
|
||||
* from a grey image. The padding argument to GetImage can be used to expand
|
||||
* the image to include more foreground pixels. See GetImage below.
|
||||
*/
|
||||
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
|
||||
int *bottom) const;
|
||||
bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
|
||||
int *right, int *bottom) const;
|
||||
/**
|
||||
* Returns the bounding rectangle of the object in a coordinate system of the
|
||||
* working image rectangle having its origin at (rect_left_, rect_top_) with
|
||||
* respect to the original image and is scaled by a factor scale_.
|
||||
*/
|
||||
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
|
||||
int *right, int *bottom) const;
|
||||
|
||||
/** Returns whether there is no object of a given level. */
|
||||
bool Empty(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns the type of the current block.
|
||||
* See tesseract/publictypes.h for PolyBlockType.
|
||||
*/
|
||||
PolyBlockType BlockType() const;
|
||||
|
||||
/**
|
||||
* Returns the polygon outline of the current block. The returned Pta must
|
||||
* be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
|
||||
* of the polygon, and the last edge is the line segment between the last
|
||||
* point and the first point. nullptr will be returned if the iterator is
|
||||
* at the end of the document or layout analysis was not used.
|
||||
*/
|
||||
Pta *BlockPolygon() const;
|
||||
|
||||
/**
|
||||
* Returns a binary image of the current object at the given level.
|
||||
* The position and size match the return from BoundingBoxInternal, and so
|
||||
* this could be upscaled with respect to the original input image.
|
||||
* Use pixDestroy to delete the image after use.
|
||||
*/
|
||||
Pix *GetBinaryImage(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns an image of the current object at the given level in greyscale
|
||||
* if available in the input. To guarantee a binary image use BinaryImage.
|
||||
* NOTE that in order to give the best possible image, the bounds are
|
||||
* expanded slightly over the binary connected component, by the supplied
|
||||
* padding, so the top-left position of the returned image is returned
|
||||
* in (left,top). These will most likely not match the coordinates
|
||||
* returned by BoundingBox.
|
||||
* If you do not supply an original image, you will get a binary one.
|
||||
* Use pixDestroy to delete the image after use.
|
||||
*/
|
||||
Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
|
||||
int *left, int *top) const;
|
||||
|
||||
/**
|
||||
* Returns the baseline of the current object at the given level.
|
||||
* The baseline is the line that passes through (x1, y1) and (x2, y2).
|
||||
* WARNING: with vertical text, baselines may be vertical!
|
||||
* Returns false if there is no baseline at the current position.
|
||||
*/
|
||||
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
|
||||
int *y2) const;
|
||||
|
||||
// Returns the attributes of the current row.
|
||||
void RowAttributes(float *row_height, float *descenders,
|
||||
float *ascenders) const;
|
||||
|
||||
/**
|
||||
* Returns orientation for the block the iterator points to.
|
||||
* orientation, writing_direction, textline_order: see publictypes.h
|
||||
* deskew_angle: after rotating the block so the text orientation is
|
||||
* upright, how many radians does one have to rotate the
|
||||
* block anti-clockwise for it to be level?
|
||||
* -Pi/4 <= deskew_angle <= Pi/4
|
||||
*/
|
||||
void Orientation(tesseract::Orientation *orientation,
|
||||
tesseract::WritingDirection *writing_direction,
|
||||
tesseract::TextlineOrder *textline_order,
|
||||
float *deskew_angle) const;
|
||||
|
||||
/**
|
||||
* Returns information about the current paragraph, if available.
|
||||
*
|
||||
* justification -
|
||||
* LEFT if ragged right, or fully justified and script is left-to-right.
|
||||
* RIGHT if ragged left, or fully justified and script is right-to-left.
|
||||
* unknown if it looks like source code or we have very few lines.
|
||||
* is_list_item -
|
||||
* true if we believe this is a member of an ordered or unordered list.
|
||||
* is_crown -
|
||||
* true if the first line of the paragraph is aligned with the other
|
||||
* lines of the paragraph even though subsequent paragraphs have first
|
||||
* line indents. This typically indicates that this is the continuation
|
||||
* of a previous paragraph or that it is the very first paragraph in
|
||||
* the chapter.
|
||||
* first_line_indent -
|
||||
* For LEFT aligned paragraphs, the first text line of paragraphs of
|
||||
* this kind are indented this many pixels from the left edge of the
|
||||
* rest of the paragraph.
|
||||
* for RIGHT aligned paragraphs, the first text line of paragraphs of
|
||||
* this kind are indented this many pixels from the right edge of the
|
||||
* rest of the paragraph.
|
||||
* NOTE 1: This value may be negative.
|
||||
* NOTE 2: if *is_crown == true, the first line of this paragraph is
|
||||
* actually flush, and first_line_indent is set to the "common"
|
||||
* first_line_indent for subsequent paragraphs in this block
|
||||
* of text.
|
||||
*/
|
||||
void ParagraphInfo(tesseract::ParagraphJustification *justification,
|
||||
bool *is_list_item, bool *is_crown,
|
||||
int *first_line_indent) const;
|
||||
|
||||
// If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
|
||||
// of the current word to the given pointer (takes ownership of the pointer)
|
||||
// and returns true.
|
||||
// Can only be used when iterating on the word level.
|
||||
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Sets up the internal data for iterating the blobs of a new word, then
|
||||
* moves the iterator to the given offset.
|
||||
*/
|
||||
void BeginWord(int offset);
|
||||
|
||||
/** Pointer to the page_res owned by the API. */
|
||||
PAGE_RES *page_res_;
|
||||
/** Pointer to the Tesseract object owned by the API. */
|
||||
Tesseract *tesseract_;
|
||||
/**
|
||||
* The iterator to the page_res_. Owned by this ResultIterator.
|
||||
* A pointer just to avoid dragging in Tesseract includes.
|
||||
*/
|
||||
PAGE_RES_IT *it_;
|
||||
/**
|
||||
* The current input WERD being iterated. If there is an output from OCR,
|
||||
* then word_ is nullptr. Owned by the API
|
||||
*/
|
||||
WERD *word_;
|
||||
/** The length of the current word_. */
|
||||
int word_length_;
|
||||
/** The current blob index within the word. */
|
||||
int blob_index_;
|
||||
/**
|
||||
* Iterator to the blobs within the word. If nullptr, then we are iterating
|
||||
* OCR results in the box_word.
|
||||
* Owned by this ResultIterator.
|
||||
*/
|
||||
C_BLOB_IT *cblob_it_;
|
||||
/** Control over what to include in bounding boxes. */
|
||||
bool include_upper_dots_;
|
||||
bool include_lower_dots_;
|
||||
/** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
|
||||
int scale_;
|
||||
int scaled_yres_;
|
||||
int rect_left_;
|
||||
int rect_top_;
|
||||
int rect_width_;
|
||||
int rect_height_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_
|
|
@ -1,281 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: publictypes.h
|
||||
// Description: Types used in both the API and internally
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// This file contains types that are used both by the API and internally
|
||||
// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
|
||||
// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
|
||||
// Restated: It is OK for low-level Tesseract files to include publictypes.h,
|
||||
// but not for the low-level tesseract code to include top-level API code.
|
||||
// This file should not use other Tesseract types, as that would drag
|
||||
// their includes into the API-level.
|
||||
|
||||
/** Number of printers' points in an inch. The unit of the pointsize return. */
|
||||
constexpr int kPointsPerInch = 72;
|
||||
/**
|
||||
* Minimum believable resolution. Used as a default if there is no other
|
||||
* information, as it is safer to under-estimate than over-estimate.
|
||||
*/
|
||||
constexpr int kMinCredibleResolution = 70;
|
||||
/** Maximum believable resolution. */
|
||||
constexpr int kMaxCredibleResolution = 2400;
|
||||
/**
|
||||
* Ratio between median blob size and likely resolution. Used to estimate
|
||||
* resolution when none is provided. This is basically 1/usual text size in
|
||||
* inches. */
|
||||
constexpr int kResolutionEstimationFactor = 10;
|
||||
|
||||
/**
|
||||
* Possible types for a POLY_BLOCK or ColPartition.
|
||||
* Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
|
||||
* below, as well as kPolyBlockNames in layout_test.cc.
|
||||
* Used extensively by ColPartition, and POLY_BLOCK.
|
||||
*/
|
||||
enum PolyBlockType {
|
||||
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
|
||||
PT_FLOWING_TEXT, // Text that lives inside a column.
|
||||
PT_HEADING_TEXT, // Text that spans more than one column.
|
||||
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
|
||||
PT_EQUATION, // Partition belonging to an equation region.
|
||||
PT_INLINE_EQUATION, // Partition has inline equation.
|
||||
PT_TABLE, // Partition belonging to a table region.
|
||||
PT_VERTICAL_TEXT, // Text-line runs vertically.
|
||||
PT_CAPTION_TEXT, // Text that belongs to an image.
|
||||
PT_FLOWING_IMAGE, // Image that lives inside a column.
|
||||
PT_HEADING_IMAGE, // Image that spans more than one column.
|
||||
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
|
||||
PT_HORZ_LINE, // Horizontal Line.
|
||||
PT_VERT_LINE, // Vertical Line.
|
||||
PT_NOISE, // Lies outside of any column.
|
||||
PT_COUNT
|
||||
};
|
||||
|
||||
/** Returns true if PolyBlockType is of horizontal line type */
|
||||
inline bool PTIsLineType(PolyBlockType type) {
|
||||
return type == PT_HORZ_LINE || type == PT_VERT_LINE;
|
||||
}
|
||||
/** Returns true if PolyBlockType is of image type */
|
||||
inline bool PTIsImageType(PolyBlockType type) {
|
||||
return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
|
||||
type == PT_PULLOUT_IMAGE;
|
||||
}
|
||||
/** Returns true if PolyBlockType is of text type */
|
||||
inline bool PTIsTextType(PolyBlockType type) {
|
||||
return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
|
||||
type == PT_PULLOUT_TEXT || type == PT_TABLE ||
|
||||
type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
|
||||
type == PT_INLINE_EQUATION;
|
||||
}
|
||||
// Returns true if PolyBlockType is of pullout(inter-column) type
|
||||
inline bool PTIsPulloutType(PolyBlockType type) {
|
||||
return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
|
||||
}
|
||||
|
||||
/**
|
||||
* +------------------+ Orientation Example:
|
||||
* | 1 Aaaa Aaaa Aaaa | ====================
|
||||
* | Aaa aa aaa aa | To left is a diagram of some (1) English and
|
||||
* | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit.
|
||||
* | 2 |
|
||||
* | ####### c c C | Upright Latin characters are represented as A and a.
|
||||
* | ####### c c c | '<' represents a latin character rotated
|
||||
* | < ####### c c c | anti-clockwise 90 degrees.
|
||||
* | < ####### c c |
|
||||
* | < ####### . c | Upright Chinese characters are represented C and c.
|
||||
* | 3 ####### c |
|
||||
* +------------------+ NOTA BENE: enum values here should match goodoc.proto
|
||||
|
||||
* If you orient your head so that "up" aligns with Orientation,
|
||||
* then the characters will appear "right side up" and readable.
|
||||
*
|
||||
* In the example above, both the English and Chinese paragraphs are oriented
|
||||
* so their "up" is the top of the page (page up). The photo credit is read
|
||||
* with one's head turned leftward ("up" is to page left).
|
||||
*
|
||||
* The values of this enum match the convention of Tesseract's osdetect.h
|
||||
*/
|
||||
enum Orientation {
|
||||
ORIENTATION_PAGE_UP = 0,
|
||||
ORIENTATION_PAGE_RIGHT = 1,
|
||||
ORIENTATION_PAGE_DOWN = 2,
|
||||
ORIENTATION_PAGE_LEFT = 3,
|
||||
};
|
||||
|
||||
/**
|
||||
* The grapheme clusters within a line of text are laid out logically
|
||||
* in this direction, judged when looking at the text line rotated so that
|
||||
* its Orientation is "page up".
|
||||
*
|
||||
* For English text, the writing direction is left-to-right. For the
|
||||
* Chinese text in the above example, the writing direction is top-to-bottom.
|
||||
*/
|
||||
enum WritingDirection {
|
||||
WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
|
||||
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
|
||||
WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
|
||||
};
|
||||
|
||||
/**
|
||||
* The text lines are read in the given sequence.
|
||||
*
|
||||
* In English, the order is top-to-bottom.
|
||||
* In Chinese, vertical text lines are read right-to-left. Mongolian is
|
||||
* written in vertical columns top to bottom like Chinese, but the lines
|
||||
* order left-to right.
|
||||
*
|
||||
* Note that only some combinations make sense. For example,
|
||||
* WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
|
||||
*/
|
||||
enum TextlineOrder {
|
||||
TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
|
||||
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
|
||||
TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
|
||||
};
|
||||
|
||||
/**
|
||||
* Possible modes for page layout analysis. These *must* be kept in order
|
||||
* of decreasing amount of layout analysis to be done, except for OSD_ONLY,
|
||||
* so that the inequality test macros below work.
|
||||
*/
|
||||
enum PageSegMode {
|
||||
PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
|
||||
PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
|
||||
///< script detection. (OSD)
|
||||
PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
|
||||
PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
|
||||
PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
|
||||
PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
|
||||
///< vertically aligned text.
|
||||
PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
|
||||
PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
|
||||
PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
|
||||
PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
|
||||
PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
|
||||
PSM_SPARSE_TEXT =
|
||||
11, ///< Find as much text as possible in no particular order.
|
||||
PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
|
||||
PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
|
||||
///< hacks that are Tesseract-specific.
|
||||
|
||||
PSM_COUNT ///< Number of enum entries.
|
||||
};
|
||||
|
||||
/**
|
||||
* Inline functions that act on a PageSegMode to determine whether components of
|
||||
* layout analysis are enabled.
|
||||
* *Depend critically on the order of elements of PageSegMode.*
|
||||
* NOTE that arg is an int for compatibility with INT_PARAM.
|
||||
*/
|
||||
inline bool PSM_OSD_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
|
||||
}
|
||||
inline bool PSM_SPARSE(int pageseg_mode) {
|
||||
return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
|
||||
}
|
||||
inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
|
||||
}
|
||||
inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
|
||||
return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
|
||||
pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
|
||||
/**
|
||||
* enum of the elements of the page hierarchy, used in ResultIterator
|
||||
* to provide functions that operate on each level without having to
|
||||
* have 5x as many functions.
|
||||
*/
|
||||
enum PageIteratorLevel {
|
||||
RIL_BLOCK, // Block of text/image/separator line.
|
||||
RIL_PARA, // Paragraph within a block.
|
||||
RIL_TEXTLINE, // Line within a paragraph.
|
||||
RIL_WORD, // Word within a textline.
|
||||
RIL_SYMBOL // Symbol/character within a word.
|
||||
};
|
||||
|
||||
/**
|
||||
* JUSTIFICATION_UNKNOWN
|
||||
* The alignment is not clearly one of the other options. This could happen
|
||||
* for example if there are only one or two lines of text or the text looks
|
||||
* like source code or poetry.
|
||||
*
|
||||
* NOTA BENE: Fully justified paragraphs (text aligned to both left and right
|
||||
* margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
|
||||
* is written with a left-to-right script and with JUSTIFICATION_RIGHT if
|
||||
* their text is written in a right-to-left script.
|
||||
*
|
||||
* Interpretation for text read in vertical lines:
|
||||
* "Left" is wherever the starting reading position is.
|
||||
*
|
||||
* JUSTIFICATION_LEFT
|
||||
* Each line, except possibly the first, is flush to the same left tab stop.
|
||||
*
|
||||
* JUSTIFICATION_CENTER
|
||||
* The text lines of the paragraph are centered about a line going
|
||||
* down through their middle of the text lines.
|
||||
*
|
||||
* JUSTIFICATION_RIGHT
|
||||
* Each line, except possibly the first, is flush to the same right tab stop.
|
||||
*/
|
||||
enum ParagraphJustification {
|
||||
JUSTIFICATION_UNKNOWN,
|
||||
JUSTIFICATION_LEFT,
|
||||
JUSTIFICATION_CENTER,
|
||||
JUSTIFICATION_RIGHT,
|
||||
};
|
||||
|
||||
/**
|
||||
* When Tesseract/Cube is initialized we can choose to instantiate/load/run
|
||||
* only the Tesseract part, only the Cube part or both along with the combiner.
|
||||
* The preference of which engine to use is stored in tessedit_ocr_engine_mode.
|
||||
*
|
||||
* ATTENTION: When modifying this enum, please make sure to make the
|
||||
* appropriate changes to all the enums mirroring it (e.g. OCREngine in
|
||||
* cityblock/workflow/detection/detection_storage.proto). Such enums will
|
||||
* mention the connection to OcrEngineMode in the comments.
|
||||
*/
|
||||
enum OcrEngineMode {
|
||||
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
|
||||
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
|
||||
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
|
||||
// to Tesseract when things get difficult.
|
||||
// deprecated
|
||||
OEM_DEFAULT, // Specify this mode when calling init_*(),
|
||||
// to indicate that any of the above modes
|
||||
// should be automatically inferred from the
|
||||
// variables in the language-specific config,
|
||||
// command-line configs, or if not specified
|
||||
// in any of the above should be set to the
|
||||
// default OEM_TESSERACT_ONLY.
|
||||
OEM_COUNT // Number of OEMs
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
|
@ -1,311 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: renderer.h
|
||||
// Description: Rendering interface to inject into TessBaseAPI
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_RENDERER_H_
|
||||
#define TESSERACT_API_RENDERER_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
|
||||
// complexity of includes here. Use forward declarations wherever possible
|
||||
// and hide includes of complex types in baseapi.cpp.
|
||||
#include <cstdint>
|
||||
#include <string> // for std::string
|
||||
#include <vector> // for std::vector
|
||||
|
||||
struct Pix;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class TessBaseAPI;
|
||||
|
||||
/**
|
||||
* Interface for rendering tesseract results into a document, such as text,
|
||||
* HOCR or pdf. This class is abstract. Specific classes handle individual
|
||||
* formats. This interface is then used to inject the renderer class into
|
||||
* tesseract when processing images.
|
||||
*
|
||||
* For simplicity implementing this with tesseract version 3.01,
|
||||
* the renderer contains document state that is cleared from document
|
||||
* to document just as the TessBaseAPI is. This way the base API can just
|
||||
* delegate its rendering functionality to injected renderers, and the
|
||||
* renderers can manage the associated state needed for the specific formats
|
||||
* in addition to the heuristics for producing it.
|
||||
*/
|
||||
class TESS_API TessResultRenderer {
|
||||
public:
|
||||
virtual ~TessResultRenderer();
|
||||
|
||||
// Takes ownership of pointer so must be new'd instance.
|
||||
// Renderers aren't ordered, but appends the sequences of next parameter
|
||||
// and existing next(). The renderers should be unique across both lists.
|
||||
void insert(TessResultRenderer *next);
|
||||
|
||||
// Returns the next renderer or nullptr.
|
||||
TessResultRenderer *next() {
|
||||
return next_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts a new document with the given title.
|
||||
* This clears the contents of the output data.
|
||||
* Title should use UTF-8 encoding.
|
||||
*/
|
||||
bool BeginDocument(const char *title);
|
||||
|
||||
/**
|
||||
* Adds the recognized text from the source image to the current document.
|
||||
* Invalid if BeginDocument not yet called.
|
||||
*
|
||||
* Note that this API is a bit weird but is designed to fit into the
|
||||
* current TessBaseAPI implementation where the api has lots of state
|
||||
* information that we might want to add in.
|
||||
*/
|
||||
bool AddImage(TessBaseAPI *api);
|
||||
|
||||
/**
|
||||
* Finishes the document and finalizes the output data
|
||||
* Invalid if BeginDocument not yet called.
|
||||
*/
|
||||
bool EndDocument();
|
||||
|
||||
const char *file_extension() const {
|
||||
return file_extension_;
|
||||
}
|
||||
const char *title() const {
|
||||
return title_.c_str();
|
||||
}
|
||||
|
||||
// Is everything fine? Otherwise something went wrong.
|
||||
bool happy() const {
|
||||
return happy_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the last image given to AddImage
|
||||
* (i.e. images are incremented whether the image succeeded or not)
|
||||
*
|
||||
* This is always defined. It means either the number of the
|
||||
* current image, the last image ended, or in the completed document
|
||||
* depending on when in the document lifecycle you are looking at it.
|
||||
* Will return -1 if a document was never started.
|
||||
*/
|
||||
int imagenum() const {
|
||||
return imagenum_;
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Called by concrete classes.
|
||||
*
|
||||
* outputbase is the name of the output file excluding
|
||||
* extension. For example, "/path/to/chocolate-chip-cookie-recipe"
|
||||
*
|
||||
* extension indicates the file extension to be used for output
|
||||
* files. For example "pdf" will produce a .pdf file, and "hocr"
|
||||
* will produce .hocr files.
|
||||
*/
|
||||
TessResultRenderer(const char *outputbase, const char *extension);
|
||||
|
||||
// Hook for specialized handling in BeginDocument()
|
||||
virtual bool BeginDocumentHandler();
|
||||
|
||||
// This must be overridden to render the OCR'd results
|
||||
virtual bool AddImageHandler(TessBaseAPI *api) = 0;
|
||||
|
||||
// Hook for specialized handling in EndDocument()
|
||||
virtual bool EndDocumentHandler();
|
||||
|
||||
// Renderers can call this to append '\0' terminated strings into
|
||||
// the output string returned by GetOutput.
|
||||
// This method will grow the output buffer if needed.
|
||||
void AppendString(const char *s);
|
||||
|
||||
// Renderers can call this to append binary byte sequences into
|
||||
// the output string returned by GetOutput. Note that s is not necessarily
|
||||
// '\0' terminated (and can contain '\0' within it).
|
||||
// This method will grow the output buffer if needed.
|
||||
void AppendData(const char *s, int len);
|
||||
|
||||
private:
|
||||
TessResultRenderer *next_; // Can link multiple renderers together
|
||||
FILE *fout_; // output file pointer
|
||||
const char *file_extension_; // standard extension for generated output
|
||||
std::string title_; // title of document being rendered
|
||||
int imagenum_; // index of last image added
|
||||
bool happy_; // I get grumpy when the disk fills up, etc.
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessTextRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessTextRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an hocr text string
|
||||
*/
|
||||
class TESS_API TessHOcrRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessHOcrRenderer(const char *outputbase, bool font_info);
|
||||
explicit TessHOcrRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool font_info_; // whether to print font information
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an alto text string
|
||||
*/
|
||||
class TESS_API TessAltoRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessAltoRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool begin_document;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders Tesseract output into a TSV string
|
||||
*/
|
||||
class TESS_API TessTsvRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessTsvRenderer(const char *outputbase, bool font_info);
|
||||
explicit TessTsvRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool font_info_; // whether to print font information
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into searchable PDF
|
||||
*/
|
||||
class TESS_API TessPDFRenderer : public TessResultRenderer {
|
||||
public:
|
||||
// datadir is the location of the TESSDATA. We need it because
|
||||
// we load a custom PDF font from this location.
|
||||
TessPDFRenderer(const char *outputbase, const char *datadir,
|
||||
bool textonly = false);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
// We don't want to have every image in memory at once,
|
||||
// so we store some metadata as we go along producing
|
||||
// PDFs one page at a time. At the end, that metadata is
|
||||
// used to make everything that isn't easily handled in a
|
||||
// streaming fashion.
|
||||
long int obj_; // counter for PDF objects
|
||||
std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
|
||||
std::vector<long int> pages_; // object number for every /Page object
|
||||
std::string datadir_; // where to find the custom font
|
||||
bool textonly_; // skip images if set
|
||||
// Bookkeeping only. DIY = Do It Yourself.
|
||||
void AppendPDFObjectDIY(size_t objectsize);
|
||||
// Bookkeeping + emit data.
|
||||
void AppendPDFObject(const char *data);
|
||||
// Create the /Contents object for an entire page.
|
||||
char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
|
||||
// Turn an image into a PDF object. Only transcode if we have to.
|
||||
static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
|
||||
char **pdf_object, long int *pdf_object_size,
|
||||
int jpg_quality);
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessUnlvRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessUnlvRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string for LSTMBox
|
||||
*/
|
||||
class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessLSTMBoxRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessBoxTextRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string in WordStr format
|
||||
*/
|
||||
class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessWordStrBoxRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an osd text string
|
||||
*/
|
||||
class TESS_API TessOsdRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessOsdRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_API_RENDERER_H_
|
|
@ -1,250 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: resultiterator.h
|
||||
// Description: Iterator for tesseract results that is capable of
|
||||
// iterating in proper reading order over Bi Directional
|
||||
// (e.g. mixed Hebrew and English) text.
|
||||
// Author: David Eger
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
||||
|
||||
#include "export.h" // for TESS_API, TESS_LOCAL
|
||||
#include "ltrresultiterator.h" // for LTRResultIterator
|
||||
#include "publictypes.h" // for PageIteratorLevel
|
||||
#include "unichar.h" // for StrongScriptDirection
|
||||
|
||||
#include <set> // for std::pair
|
||||
#include <vector> // for std::vector
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class TESS_API ResultIterator : public LTRResultIterator {
|
||||
public:
|
||||
static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
|
||||
|
||||
/**
|
||||
* ResultIterator is copy constructible!
|
||||
* The default copy constructor works just fine for us.
|
||||
*/
|
||||
~ResultIterator() override = default;
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
/**
|
||||
* Moves the iterator to point to the start of the page to begin
|
||||
* an iteration.
|
||||
*/
|
||||
void Begin() override;
|
||||
|
||||
/**
|
||||
* Moves to the start of the next object at the given level in the
|
||||
* page hierarchy in the appropriate reading order and returns false if
|
||||
* the end of the page was reached.
|
||||
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
|
||||
* PageIteratorLevel level values will visit each non-text block once.
|
||||
* Think of non text blocks as containing a single para, with a single line,
|
||||
* with a single imaginary word.
|
||||
* Calls to Next with different levels may be freely intermixed.
|
||||
* This function iterates words in right-to-left scripts correctly, if
|
||||
* the appropriate language has been loaded into Tesseract.
|
||||
*/
|
||||
bool Next(PageIteratorLevel level) override;
|
||||
|
||||
/**
|
||||
* IsAtBeginningOf() returns whether we're at the logical beginning of the
|
||||
* given level. (as opposed to ResultIterator's left-to-right top-to-bottom
|
||||
* order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
|
||||
* For a full description, see pageiterator.h
|
||||
*/
|
||||
bool IsAtBeginningOf(PageIteratorLevel level) const override;
|
||||
|
||||
/**
|
||||
* Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
|
||||
* For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
|
||||
* point at the last word in a paragraph. See PageIterator for full comment.
|
||||
*/
|
||||
bool IsAtFinalElement(PageIteratorLevel level,
|
||||
PageIteratorLevel element) const override;
|
||||
|
||||
// ============= Functions that refer to words only ============.
|
||||
// Returns the number of blanks before the current word.
|
||||
int BlanksBeforeWord() const;
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
/**
|
||||
* Returns the null terminated UTF-8 encoded text string for the current
|
||||
* object at the given level. Use delete [] to free after use.
|
||||
*/
|
||||
virtual char *GetUTF8Text(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns the LSTM choices for every LSTM timestep for the current word.
|
||||
*/
|
||||
virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
|
||||
*GetRawLSTMTimesteps() const;
|
||||
virtual std::vector<std::vector<std::pair<const char *, float>>>
|
||||
*GetBestLSTMSymbolChoices() const;
|
||||
|
||||
/**
|
||||
* Return whether the current paragraph's dominant reading direction
|
||||
* is left-to-right (as opposed to right-to-left).
|
||||
*/
|
||||
bool ParagraphIsLtr() const;
|
||||
|
||||
// ============= Exposed only for testing =============.
|
||||
|
||||
/**
|
||||
* Yields the reading order as a sequence of indices and (optional)
|
||||
* meta-marks for a set of words (given left-to-right).
|
||||
* The meta marks are passed as negative values:
|
||||
* kMinorRunStart Start of minor direction text.
|
||||
* kMinorRunEnd End of minor direction text.
|
||||
* kComplexWord The next indexed word contains both left-to-right and
|
||||
* right-to-left characters and was treated as neutral.
|
||||
*
|
||||
* For example, suppose we have five words in a text line,
|
||||
* indexed [0,1,2,3,4] from the leftmost side of the text line.
|
||||
* The following are all believable reading_orders:
|
||||
*
|
||||
* Left-to-Right (in ltr paragraph):
|
||||
* { 0, 1, 2, 3, 4 }
|
||||
* Left-to-Right (in rtl paragraph):
|
||||
* { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
|
||||
* Right-to-Left (in rtl paragraph):
|
||||
* { 4, 3, 2, 1, 0 }
|
||||
* Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
|
||||
* { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
|
||||
*/
|
||||
static void CalculateTextlineOrder(
|
||||
bool paragraph_is_ltr,
|
||||
const std::vector<StrongScriptDirection> &word_dirs,
|
||||
std::vector<int> *reading_order);
|
||||
|
||||
static const int kMinorRunStart;
|
||||
static const int kMinorRunEnd;
|
||||
static const int kComplexWord;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* We presume the data associated with the given iterator will outlive us.
|
||||
* NB: This is private because it does something that is non-obvious:
|
||||
* it resets to the beginning of the paragraph instead of staying wherever
|
||||
* resit might have pointed.
|
||||
*/
|
||||
explicit ResultIterator(const LTRResultIterator &resit);
|
||||
|
||||
private:
|
||||
/**
|
||||
* Calculates the current paragraph's dominant writing direction.
|
||||
* Typically, members should use current_paragraph_ltr_ instead.
|
||||
*/
|
||||
bool CurrentParagraphIsLtr() const;
|
||||
|
||||
/**
|
||||
* Returns word indices as measured from resit->RestartRow() = index 0
|
||||
* for the reading order of words within a textline given an iterator
|
||||
* into the middle of the text line.
|
||||
* In addition to non-negative word indices, the following negative values
|
||||
* may be inserted:
|
||||
* kMinorRunStart Start of minor direction text.
|
||||
* kMinorRunEnd End of minor direction text.
|
||||
* kComplexWord The previous word contains both left-to-right and
|
||||
* right-to-left characters and was treated as neutral.
|
||||
*/
|
||||
void CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
const LTRResultIterator &resit,
|
||||
std::vector<int> *indices) const;
|
||||
/** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
|
||||
void CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
const LTRResultIterator &resit,
|
||||
std::vector<StrongScriptDirection> *ssd,
|
||||
std::vector<int> *indices) const;
|
||||
|
||||
/**
|
||||
* What is the index of the current word in a strict left-to-right reading
|
||||
* of the row?
|
||||
*/
|
||||
int LTRWordIndex() const;
|
||||
|
||||
/**
|
||||
* Given an iterator pointing at a word, returns the logical reading order
|
||||
* of blob indices for the word.
|
||||
*/
|
||||
void CalculateBlobOrder(std::vector<int> *blob_indices) const;
|
||||
|
||||
/** Precondition: current_paragraph_is_ltr_ is set. */
|
||||
void MoveToLogicalStartOfTextline();
|
||||
|
||||
/**
|
||||
* Precondition: current_paragraph_is_ltr_ and in_minor_direction_
|
||||
* are set.
|
||||
*/
|
||||
void MoveToLogicalStartOfWord();
|
||||
|
||||
/** Are we pointing at the final (reading order) symbol of the word? */
|
||||
bool IsAtFinalSymbolOfWord() const;
|
||||
|
||||
/** Are we pointing at the first (reading order) symbol of the word? */
|
||||
bool IsAtFirstSymbolOfWord() const;
|
||||
|
||||
/**
|
||||
* Append any extra marks that should be appended to this word when printed.
|
||||
* Mostly, these are Unicode BiDi control characters.
|
||||
*/
|
||||
void AppendSuffixMarks(std::string *text) const;
|
||||
|
||||
/** Appends the current word in reading order to the given buffer.*/
|
||||
void AppendUTF8WordText(std::string *text) const;
|
||||
|
||||
/**
|
||||
* Appends the text of the current text line, *assuming this iterator is
|
||||
* positioned at the beginning of the text line* This function
|
||||
* updates the iterator to point to the first position past the text line.
|
||||
* Each textline is terminated in a single newline character.
|
||||
* If the textline ends a paragraph, it gets a second terminal newline.
|
||||
*/
|
||||
void IterateAndAppendUTF8TextlineText(std::string *text);
|
||||
|
||||
/**
|
||||
* Appends the text of the current paragraph in reading order
|
||||
* to the given buffer.
|
||||
* Each textline is terminated in a single newline character, and the
|
||||
* paragraph gets an extra newline at the end.
|
||||
*/
|
||||
void AppendUTF8ParagraphText(std::string *text) const;
|
||||
|
||||
/** Returns whether the bidi_debug flag is set to at least min_level. */
|
||||
bool BidiDebug(int min_level) const;
|
||||
|
||||
bool current_paragraph_is_ltr_;
|
||||
|
||||
/**
|
||||
* Is the currently pointed-at character at the beginning of
|
||||
* a minor-direction run?
|
||||
*/
|
||||
bool at_beginning_of_minor_run_;
|
||||
|
||||
/** Is the currently pointed-at character in a minor-direction sequence? */
|
||||
bool in_minor_direction_;
|
||||
|
||||
/**
|
||||
* Should detected inter-word spaces be preserved, or "compressed" to a single
|
||||
* space character (default behavior).
|
||||
*/
|
||||
bool preserve_interword_spaces_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
|
@ -1,174 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: unichar.h
|
||||
// Description: Unicode character/ligature class.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCUTIL_UNICHAR_H_
|
||||
#define TESSERACT_CCUTIL_UNICHAR_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
#include <memory.h>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Maximum number of characters that can be stored in a UNICHAR. Must be
|
||||
// at least 4. Must not exceed 31 without changing the coding of length.
|
||||
#define UNICHAR_LEN 30
|
||||
|
||||
// A UNICHAR_ID is the unique id of a unichar.
|
||||
using UNICHAR_ID = int;
|
||||
|
||||
// A variable to indicate an invalid or uninitialized unichar id.
|
||||
static const int INVALID_UNICHAR_ID = -1;
|
||||
// A special unichar that corresponds to INVALID_UNICHAR_ID.
|
||||
static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
|
||||
|
||||
enum StrongScriptDirection {
|
||||
DIR_NEUTRAL = 0, // Text contains only neutral characters.
|
||||
DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
|
||||
DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
|
||||
DIR_MIX = 3, // Text contains a mixture of left-to-right
|
||||
// and right-to-left characters.
|
||||
};
|
||||
|
||||
using char32 = signed int;
|
||||
|
||||
// The UNICHAR class holds a single classification result. This may be
|
||||
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
|
||||
// multiple Unicode characters representing the NFKC expansion of a ligature
|
||||
// such as fi, ffl etc. These are also stored as utf8.
|
||||
class TESS_API UNICHAR {
|
||||
public:
|
||||
UNICHAR() {
|
||||
memset(chars, 0, UNICHAR_LEN);
|
||||
}
|
||||
|
||||
// Construct from a utf8 string. If len<0 then the string is null terminated.
|
||||
// If the string is too long to fit in the UNICHAR then it takes only what
|
||||
// will fit.
|
||||
UNICHAR(const char *utf8_str, int len);
|
||||
|
||||
// Construct from a single UCS4 character.
|
||||
explicit UNICHAR(int unicode);
|
||||
|
||||
// Default copy constructor and operator= are OK.
|
||||
|
||||
// Get the first character as UCS-4.
|
||||
int first_uni() const;
|
||||
|
||||
// Get the length of the UTF8 string.
|
||||
int utf8_len() const {
|
||||
int len = chars[UNICHAR_LEN - 1];
|
||||
return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
|
||||
}
|
||||
|
||||
// Get a UTF8 string, but NOT nullptr terminated.
|
||||
const char *utf8() const {
|
||||
return chars;
|
||||
}
|
||||
|
||||
// Get a terminated UTF8 string: Must delete[] it after use.
|
||||
char *utf8_str() const;
|
||||
|
||||
// Get the number of bytes in the first character of the given utf8 string.
|
||||
static int utf8_step(const char *utf8_str);
|
||||
|
||||
// A class to simplify iterating over and accessing elements of a UTF8
|
||||
// string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
|
||||
// take ownership of the underlying byte array. It also does not permit
|
||||
// modification of the array (as the name suggests).
|
||||
//
|
||||
// Example:
|
||||
// for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
|
||||
// it != UNICHAR::end(str, len);
|
||||
// ++it) {
|
||||
// printf("UCS-4 symbol code = %d\n", *it);
|
||||
// char buf[5];
|
||||
// int char_len = it.get_utf8(buf); buf[char_len] = '\0';
|
||||
// printf("Char = %s\n", buf);
|
||||
// }
|
||||
class TESS_API const_iterator {
|
||||
using CI = const_iterator;
|
||||
|
||||
public:
|
||||
// Step to the next UTF8 character.
|
||||
// If the current position is at an illegal UTF8 character, then print an
|
||||
// error message and step by one byte. If the current position is at a
|
||||
// nullptr value, don't step past it.
|
||||
const_iterator &operator++();
|
||||
|
||||
// Return the UCS-4 value at the current position.
|
||||
// If the current position is at an illegal UTF8 value, return a single
|
||||
// space character.
|
||||
int operator*() const;
|
||||
|
||||
// Store the UTF-8 encoding of the current codepoint into buf, which must be
|
||||
// at least 4 bytes long. Return the number of bytes written.
|
||||
// If the current position is at an illegal UTF8 value, writes a single
|
||||
// space character and returns 1.
|
||||
// Note that this method does not null-terminate the buffer.
|
||||
int get_utf8(char *buf) const;
|
||||
// Returns the number of bytes of the current codepoint. Returns 1 if the
|
||||
// current position is at an illegal UTF8 value.
|
||||
int utf8_len() const;
|
||||
// Returns true if the UTF-8 encoding at the current position is legal.
|
||||
bool is_legal() const;
|
||||
|
||||
// Return the pointer into the string at the current position.
|
||||
const char *utf8_data() const {
|
||||
return it_;
|
||||
}
|
||||
|
||||
// Iterator equality operators.
|
||||
friend bool operator==(const CI &lhs, const CI &rhs) {
|
||||
return lhs.it_ == rhs.it_;
|
||||
}
|
||||
friend bool operator!=(const CI &lhs, const CI &rhs) {
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
private:
|
||||
friend class UNICHAR;
|
||||
explicit const_iterator(const char *it) : it_(it) {}
|
||||
|
||||
const char *it_; // Pointer into the string.
|
||||
};
|
||||
|
||||
// Create a start/end iterator pointing to a string. Note that these methods
|
||||
// are static and do NOT create a copy or take ownership of the underlying
|
||||
// array.
|
||||
static const_iterator begin(const char *utf8_str, int byte_length);
|
||||
static const_iterator end(const char *utf8_str, int byte_length);
|
||||
|
||||
// Converts a utf-8 string to a vector of unicodes.
|
||||
// Returns an empty vector if the input contains invalid UTF-8.
|
||||
static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
|
||||
// Converts a vector of unicodes to a utf8 string.
|
||||
// Returns an empty string if the input contains an invalid unicode.
|
||||
static std::string UTF32ToUTF8(const std::vector<char32> &str32);
|
||||
|
||||
private:
|
||||
// A UTF-8 representation of 1 or more Unicode characters.
|
||||
// The last element (chars[UNICHAR_LEN - 1]) is a length if
|
||||
// its value < UNICHAR_LEN, otherwise it is a genuine character.
|
||||
char chars[UNICHAR_LEN]{};
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCUTIL_UNICHAR_H_
|
|
@ -1,34 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: version.h
|
||||
// Description: Version information
|
||||
//
|
||||
// (C) Copyright 2018, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_VERSION_H_
|
||||
#define TESSERACT_API_VERSION_H_
|
||||
|
||||
// clang-format off
|
||||
|
||||
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
|
||||
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
|
||||
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
|
||||
|
||||
#define TESSERACT_VERSION \
|
||||
(TESSERACT_MAJOR_VERSION << 16 | \
|
||||
TESSERACT_MINOR_VERSION << 8 | \
|
||||
TESSERACT_MICRO_VERSION)
|
||||
|
||||
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
|
||||
|
||||
// clang-format on
|
||||
|
||||
#endif // TESSERACT_API_VERSION_H_
|
|
@ -1,812 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: baseapi.h
|
||||
// Description: Simple API for calling tesseract.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_BASEAPI_H_
|
||||
#define TESSERACT_API_BASEAPI_H_
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config_auto.h" // DISABLED_LEGACY_ENGINE
|
||||
#endif
|
||||
|
||||
#include "export.h"
|
||||
#include "pageiterator.h"
|
||||
#include "publictypes.h"
|
||||
#include "resultiterator.h"
|
||||
#include "unichar.h"
|
||||
|
||||
#include "version.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <vector> // for std::vector
|
||||
|
||||
struct Pix;
|
||||
struct Pixa;
|
||||
struct Boxa;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class PAGE_RES;
|
||||
class ParagraphModel;
|
||||
class BLOCK_LIST;
|
||||
class ETEXT_DESC;
|
||||
struct OSResults;
|
||||
class UNICHARSET;
|
||||
|
||||
class Dawg;
|
||||
class Dict;
|
||||
class EquationDetect;
|
||||
class PageIterator;
|
||||
class ImageThresholder;
|
||||
class LTRResultIterator;
|
||||
class ResultIterator;
|
||||
class MutableIterator;
|
||||
class TessResultRenderer;
|
||||
class Tesseract;
|
||||
|
||||
// Function to read a std::vector<char> from a whole file.
|
||||
// Returns false on failure.
|
||||
using FileReader = bool (*)(const char *filename, std::vector<char> *data);
|
||||
|
||||
using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
|
||||
bool) const;
|
||||
using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
|
||||
int, const char *, int);
|
||||
|
||||
/**
|
||||
* Base class for all tesseract APIs.
|
||||
* Specific classes can add ability to work on different inputs or produce
|
||||
* different outputs.
|
||||
* This class is mostly an interface layer on top of the Tesseract instance
|
||||
* class to hide the data types so that users of this class don't have to
|
||||
* include any other Tesseract headers.
|
||||
*/
|
||||
class TESS_API TessBaseAPI {
|
||||
public:
|
||||
TessBaseAPI();
|
||||
virtual ~TessBaseAPI();
|
||||
// Copy constructor and assignment operator are currently unsupported.
|
||||
TessBaseAPI(TessBaseAPI const &) = delete;
|
||||
TessBaseAPI &operator=(TessBaseAPI const &) = delete;
|
||||
|
||||
/**
|
||||
* Returns the version identifier as a static string. Do not delete.
|
||||
*/
|
||||
static const char *Version();
|
||||
|
||||
/**
|
||||
* If compiled with OpenCL AND an available OpenCL
|
||||
* device is deemed faster than serial code, then
|
||||
* "device" is populated with the cl_device_id
|
||||
* and returns sizeof(cl_device_id)
|
||||
* otherwise *device=nullptr and returns 0.
|
||||
*/
|
||||
static size_t getOpenCLDevice(void **device);
|
||||
|
||||
/**
|
||||
* Set the name of the input file. Needed for training and
|
||||
* reading a UNLV zone file, and for searchable PDF output.
|
||||
*/
|
||||
void SetInputName(const char *name);
|
||||
/**
|
||||
* These functions are required for searchable PDF output.
|
||||
* We need our hands on the input file so that we can include
|
||||
* it in the PDF without transcoding. If that is not possible,
|
||||
* we need the original image. Finally, resolution metadata
|
||||
* is stored in the PDF so we need that as well.
|
||||
*/
|
||||
const char *GetInputName();
|
||||
// Takes ownership of the input pix.
|
||||
void SetInputImage(Pix *pix);
|
||||
Pix *GetInputImage();
|
||||
int GetSourceYResolution();
|
||||
const char *GetDatapath();
|
||||
|
||||
/** Set the name of the bonus output files. Needed only for debugging. */
|
||||
void SetOutputName(const char *name);
|
||||
|
||||
/**
|
||||
* Set the value of an internal "parameter."
|
||||
* Supply the name of the parameter and the value as a string, just as
|
||||
* you would in a config file.
|
||||
* Returns false if the name lookup failed.
|
||||
* Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
|
||||
* Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
|
||||
* SetVariable may be used before Init, but settings will revert to
|
||||
* defaults on End().
|
||||
*
|
||||
* Note: Must be called after Init(). Only works for non-init variables
|
||||
* (init variables should be passed to Init()).
|
||||
*/
|
||||
bool SetVariable(const char *name, const char *value);
|
||||
bool SetDebugVariable(const char *name, const char *value);
|
||||
|
||||
/**
|
||||
* Returns true if the parameter was found among Tesseract parameters.
|
||||
* Fills in value with the value of the parameter.
|
||||
*/
|
||||
bool GetIntVariable(const char *name, int *value) const;
|
||||
bool GetBoolVariable(const char *name, bool *value) const;
|
||||
bool GetDoubleVariable(const char *name, double *value) const;
|
||||
|
||||
/**
|
||||
* Returns the pointer to the string that represents the value of the
|
||||
* parameter if it was found among Tesseract parameters.
|
||||
*/
|
||||
const char *GetStringVariable(const char *name) const;
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Print Tesseract fonts table to the given file.
|
||||
*/
|
||||
void PrintFontsTable(FILE *fp) const;
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Print Tesseract parameters to the given file.
|
||||
*/
|
||||
void PrintVariables(FILE *fp) const;
|
||||
|
||||
/**
|
||||
* Get value of named variable as a string, if it exists.
|
||||
*/
|
||||
bool GetVariableAsString(const char *name, std::string *val) const;
|
||||
|
||||
/**
|
||||
* Instances are now mostly thread-safe and totally independent,
|
||||
* but some global parameters remain. Basically it is safe to use multiple
|
||||
* TessBaseAPIs in different threads in parallel, UNLESS:
|
||||
* you use SetVariable on some of the Params in classify and textord.
|
||||
* If you do, then the effect will be to change it for all your instances.
|
||||
*
|
||||
* Start tesseract. Returns zero on success and -1 on failure.
|
||||
* NOTE that the only members that may be called before Init are those
|
||||
* listed above here in the class definition.
|
||||
*
|
||||
* The datapath must be the name of the tessdata directory.
|
||||
* The language is (usually) an ISO 639-3 string or nullptr will default to
|
||||
* eng. It is entirely safe (and eventually will be efficient too) to call
|
||||
* Init multiple times on the same instance to change language, or just
|
||||
* to reset the classifier.
|
||||
* The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
|
||||
* that multiple languages are to be loaded. Eg hin+eng will load Hindi and
|
||||
* English. Languages may specify internally that they want to be loaded
|
||||
* with one or more other languages, so the ~ sign is available to override
|
||||
* that. Eg if hin were set to load eng by default, then hin+~eng would force
|
||||
* loading only hin. The number of loaded languages is limited only by
|
||||
* memory, with the caveat that loading additional languages will impact
|
||||
* both speed and accuracy, as there is more work to do to decide on the
|
||||
* applicable language, and there is more chance of hallucinating incorrect
|
||||
* words.
|
||||
* WARNING: On changing languages, all Tesseract parameters are reset
|
||||
* back to their default values. (Which may vary between languages.)
|
||||
* If you have a rare need to set a Variable that controls
|
||||
* initialization for a second call to Init you should explicitly
|
||||
* call End() and then use SetVariable before Init. This is only a very
|
||||
* rare use case, since there are very few uses that require any parameters
|
||||
* to be set before Init.
|
||||
*
|
||||
* If set_only_non_debug_params is true, only params that do not contain
|
||||
* "debug" in the name will be set.
|
||||
*/
|
||||
int Init(const char *datapath, const char *language, OcrEngineMode mode,
|
||||
char **configs, int configs_size,
|
||||
const std::vector<std::string> *vars_vec,
|
||||
const std::vector<std::string> *vars_values,
|
||||
bool set_only_non_debug_params);
|
||||
int Init(const char *datapath, const char *language, OcrEngineMode oem) {
|
||||
return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
|
||||
}
|
||||
int Init(const char *datapath, const char *language) {
|
||||
return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
|
||||
false);
|
||||
}
|
||||
// In-memory version reads the traineddata file directly from the given
|
||||
// data[data_size] array, and/or reads data via a FileReader.
|
||||
int Init(const char *data, int data_size, const char *language,
|
||||
OcrEngineMode mode, char **configs, int configs_size,
|
||||
const std::vector<std::string> *vars_vec,
|
||||
const std::vector<std::string> *vars_values,
|
||||
bool set_only_non_debug_params, FileReader reader);
|
||||
|
||||
/**
|
||||
* Returns the languages string used in the last valid initialization.
|
||||
* If the last initialization specified "deu+hin" then that will be
|
||||
* returned. If hin loaded eng automatically as well, then that will
|
||||
* not be included in this list. To find the languages actually
|
||||
* loaded use GetLoadedLanguagesAsVector.
|
||||
* The returned string should NOT be deleted.
|
||||
*/
|
||||
const char *GetInitLanguagesAsString() const;
|
||||
|
||||
/**
|
||||
* Returns the loaded languages in the vector of std::string.
|
||||
* Includes all languages loaded by the last Init, including those loaded
|
||||
* as dependencies of other loaded languages.
|
||||
*/
|
||||
void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
|
||||
|
||||
/**
|
||||
* Returns the available languages in the sorted vector of std::string.
|
||||
*/
|
||||
void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
|
||||
|
||||
/**
|
||||
* Init only for page layout analysis. Use only for calls to SetImage and
|
||||
* AnalysePage. Calls that attempt recognition will generate an error.
|
||||
*/
|
||||
void InitForAnalysePage();
|
||||
|
||||
/**
|
||||
* Read a "config" file containing a set of param, value pairs.
|
||||
* Searches the standard places: tessdata/configs, tessdata/tessconfigs
|
||||
* and also accepts a relative or absolute path name.
|
||||
* Note: only non-init params will be set (init params are set by Init()).
|
||||
*/
|
||||
void ReadConfigFile(const char *filename);
|
||||
/** Same as above, but only set debug params from the given config file. */
|
||||
void ReadDebugConfigFile(const char *filename);
|
||||
|
||||
/**
|
||||
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
|
||||
* The mode is stored as an IntParam so it can also be modified by
|
||||
* ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
|
||||
*/
|
||||
void SetPageSegMode(PageSegMode mode);
|
||||
|
||||
/** Return the current page segmentation mode. */
|
||||
PageSegMode GetPageSegMode() const;
|
||||
|
||||
/**
|
||||
* Recognize a rectangle from an image and return the result as a string.
|
||||
* May be called many times for a single Init.
|
||||
* Currently has no error checking.
|
||||
* Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
|
||||
* Palette color images will not work properly and must be converted to
|
||||
* 24 bit.
|
||||
* Binary images of 1 bit per pixel may also be given but they must be
|
||||
* byte packed with the MSB of the first byte being the first pixel, and a
|
||||
* 1 represents WHITE. For binary images set bytes_per_pixel=0.
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
*
|
||||
* Note that TesseractRect is the simplified convenience interface.
|
||||
* For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
|
||||
* and one or more of the Get*Text functions below.
|
||||
*/
|
||||
char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
|
||||
int bytes_per_line, int left, int top, int width,
|
||||
int height);
|
||||
|
||||
/**
|
||||
* Call between pages or documents etc to free up memory and forget
|
||||
* adaptive data.
|
||||
*/
|
||||
void ClearAdaptiveClassifier();
|
||||
|
||||
/**
|
||||
* @defgroup AdvancedAPI Advanced API
|
||||
* The following methods break TesseractRect into pieces, so you can
|
||||
* get hold of the thresholded image, get the text in different formats,
|
||||
* get bounding boxes, confidences etc.
|
||||
*/
|
||||
/* @{ */
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. Format is as
|
||||
* TesseractRect above. Copies the image buffer and converts to Pix.
|
||||
* SetImage clears all recognition results, and sets the rectangle to the
|
||||
* full image, so it may be followed immediately by a GetUTF8Text, and it
|
||||
* will automatically perform recognition.
|
||||
*/
|
||||
void SetImage(const unsigned char *imagedata, int width, int height,
|
||||
int bytes_per_pixel, int bytes_per_line);
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. As with SetImage above,
|
||||
* Tesseract takes its own copy of the image, so it need not persist until
|
||||
* after Recognize.
|
||||
* Pix vs raw, which to use?
|
||||
* Use Pix where possible. Tesseract uses Pix as its internal representation
|
||||
* and it is therefore more efficient to provide a Pix directly.
|
||||
*/
|
||||
void SetImage(Pix *pix);
|
||||
|
||||
/**
|
||||
* Set the resolution of the source image in pixels per inch so font size
|
||||
* information can be calculated in results. Call this after SetImage().
|
||||
*/
|
||||
void SetSourceResolution(int ppi);
|
||||
|
||||
/**
|
||||
* Restrict recognition to a sub-rectangle of the image. Call after SetImage.
|
||||
* Each SetRectangle clears the recogntion results so multiple rectangles
|
||||
* can be recognized with the same image.
|
||||
*/
|
||||
void SetRectangle(int left, int top, int width, int height);
|
||||
|
||||
/**
|
||||
* Get a copy of the internal thresholded image from Tesseract.
|
||||
* Caller takes ownership of the Pix and must pixDestroy it.
|
||||
* May be called any time after SetImage, or after TesseractRect.
|
||||
*/
|
||||
Pix *GetThresholdedImage();
|
||||
|
||||
/**
|
||||
* Get the result of page layout analysis as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
*/
|
||||
Boxa *GetRegions(Pixa **pixa);
|
||||
|
||||
/**
|
||||
* Get the textlines as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* If raw_image is true, then extract from the original image instead of the
|
||||
* thresholded image and pad by raw_padding pixels.
|
||||
* If blockids is not nullptr, the block-id of each line is also returned as
|
||||
* an array of one element per line. delete [] after use. If paraids is not
|
||||
* nullptr, the paragraph-id of each line within its block is also returned as
|
||||
* an array of one element per line. delete [] after use.
|
||||
*/
|
||||
Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
/*
|
||||
Helper method to extract from the thresholded image. (most common usage)
|
||||
*/
|
||||
Boxa *GetTextlines(Pixa **pixa, int **blockids) {
|
||||
return GetTextlines(false, 0, pixa, blockids, nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
|
||||
* pair, in reading order. Enables downstream handling of non-rectangular
|
||||
* regions.
|
||||
* Can be called before or after Recognize.
|
||||
* If blockids is not nullptr, the block-id of each line is also returned as
|
||||
* an array of one element per line. delete [] after use.
|
||||
*/
|
||||
Boxa *GetStrips(Pixa **pixa, int **blockids);
|
||||
|
||||
/**
|
||||
* Get the words as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
*/
|
||||
Boxa *GetWords(Pixa **pixa);
|
||||
|
||||
/**
|
||||
* Gets the individual connected (text) components (created
|
||||
* after pages segmentation step, but before recognition)
|
||||
* as a leptonica-style Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* Note: the caller is responsible for calling boxaDestroy()
|
||||
* on the returned Boxa array and pixaDestroy() on cc array.
|
||||
*/
|
||||
Boxa *GetConnectedComponents(Pixa **cc);
|
||||
|
||||
/**
|
||||
* Get the given level kind of components (block, textline, word etc.) as a
|
||||
* leptonica-style Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* If blockids is not nullptr, the block-id of each component is also returned
|
||||
* as an array of one element per component. delete [] after use.
|
||||
* If blockids is not nullptr, the paragraph-id of each component with its
|
||||
* block is also returned as an array of one element per component. delete []
|
||||
* after use. If raw_image is true, then portions of the original image are
|
||||
* extracted instead of the thresholded image and padded with raw_padding. If
|
||||
* text_only is true, then only text components are returned.
|
||||
*/
|
||||
Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
|
||||
bool raw_image, int raw_padding, Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
// Helper function to get binary images with no padding (most common usage).
|
||||
Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
|
||||
Pixa **pixa, int **blockids) {
|
||||
return GetComponentImages(level, text_only, false, 0, pixa, blockids,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the scale factor of the thresholded image that would be returned by
|
||||
* GetThresholdedImage() and the various GetX() methods that call
|
||||
* GetComponentImages().
|
||||
* Returns 0 if no thresholder has been set.
|
||||
*/
|
||||
int GetThresholdedImageScaleFactor() const;
|
||||
|
||||
/**
|
||||
* Runs page layout analysis in the mode set by SetPageSegMode.
|
||||
* May optionally be called prior to Recognize to get access to just
|
||||
* the page layout results. Returns an iterator to the results.
|
||||
* If merge_similar_words is true, words are combined where suitable for use
|
||||
* with a line recognizer. Use if you want to use AnalyseLayout to find the
|
||||
* textlines, and then want to process textline fragments with an external
|
||||
* line recognizer.
|
||||
* Returns nullptr on error or an empty page.
|
||||
* The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
PageIterator *AnalyseLayout();
|
||||
PageIterator *AnalyseLayout(bool merge_similar_words);
|
||||
|
||||
/**
|
||||
* Recognize the image from SetAndThresholdImage, generating Tesseract
|
||||
* internal structures. Returns 0 on success.
|
||||
* Optional. The Get*Text functions below will call Recognize if needed.
|
||||
* After Recognize, the output is kept internally until the next SetImage.
|
||||
*/
|
||||
int Recognize(ETEXT_DESC *monitor);
|
||||
|
||||
/**
|
||||
* Methods to retrieve information after SetAndThresholdImage(),
|
||||
* Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
|
||||
*/
|
||||
|
||||
/**
|
||||
* Turns images into symbolic text.
|
||||
*
|
||||
* filename can point to a single image, a multi-page TIFF,
|
||||
* or a plain text list of image filenames.
|
||||
*
|
||||
* retry_config is useful for debugging. If not nullptr, you can fall
|
||||
* back to an alternate configuration if a page fails for some
|
||||
* reason.
|
||||
*
|
||||
* timeout_millisec terminates processing if any single page
|
||||
* takes too long. Set to 0 for unlimited time.
|
||||
*
|
||||
* renderer is responible for creating the output. For example,
|
||||
* use the TessTextRenderer if you want plaintext output, or
|
||||
* the TessPDFRender to produce searchable PDF.
|
||||
*
|
||||
* If tessedit_page_number is non-negative, will only process that
|
||||
* single page. Works for multi-page tiff file, or filelist.
|
||||
*
|
||||
* Returns true if successful, false on error.
|
||||
*/
|
||||
bool ProcessPages(const char *filename, const char *retry_config,
|
||||
int timeout_millisec, TessResultRenderer *renderer);
|
||||
// Does the real work of ProcessPages.
|
||||
bool ProcessPagesInternal(const char *filename, const char *retry_config,
|
||||
int timeout_millisec, TessResultRenderer *renderer);
|
||||
|
||||
/**
|
||||
* Turn a single image into symbolic text.
|
||||
*
|
||||
* The pix is the image processed. filename and page_index are
|
||||
* metadata used by side-effect processes, such as reading a box
|
||||
* file or formatting as hOCR.
|
||||
*
|
||||
* See ProcessPages for descriptions of other parameters.
|
||||
*/
|
||||
bool ProcessPage(Pix *pix, int page_index, const char *filename,
|
||||
const char *retry_config, int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
|
||||
/**
|
||||
* Get a reading-order iterator to the results of LayoutAnalysis and/or
|
||||
* Recognize. The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
ResultIterator *GetIterator();
|
||||
|
||||
/**
|
||||
* Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
|
||||
* The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
MutableIterator *GetMutableIterator();
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetUTF8Text();
|
||||
|
||||
/**
|
||||
* Make a HTML-formatted string with hOCR markup from the internal
|
||||
* data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* monitor can be used to
|
||||
* cancel the recognition
|
||||
* receive progress callbacks
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
|
||||
|
||||
/**
|
||||
* Make a HTML-formatted string with hOCR markup from the internal
|
||||
* data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetHOCRText(int page_number);
|
||||
|
||||
/**
|
||||
* Make an XML-formatted string with Alto markup from the internal
|
||||
* data structures.
|
||||
*/
|
||||
char *GetAltoText(ETEXT_DESC *monitor, int page_number);
|
||||
|
||||
/**
|
||||
* Make an XML-formatted string with Alto markup from the internal
|
||||
* data structures.
|
||||
*/
|
||||
char *GetAltoText(int page_number);
|
||||
|
||||
/**
|
||||
* Make a TSV-formatted string from the internal data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetTSVText(int page_number);
|
||||
|
||||
/**
|
||||
* Make a box file for LSTM training from the internal data structures.
|
||||
* Constructs coordinates in the original image - not just the rectangle.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetLSTMBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded in the same
|
||||
* format as a box file used in training.
|
||||
* Constructs coordinates in the original image - not just the rectangle.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded in the same
|
||||
* format as a WordStr box file used in training.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetWordStrBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UNLV format Latin-1 with specific reject and suspect codes.
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *GetUNLVText();
|
||||
|
||||
/**
|
||||
* Detect the orientation of the input image and apparent script (alphabet).
|
||||
* orient_deg is the detected clockwise rotation of the input image in degrees
|
||||
* (0, 90, 180, 270)
|
||||
* orient_conf is the confidence (15.0 is reasonably confident)
|
||||
* script_name is an ASCII string, the name of the script, e.g. "Latin"
|
||||
* script_conf is confidence level in the script
|
||||
* Returns true on success and writes values to each parameter as an output
|
||||
*/
|
||||
bool DetectOrientationScript(int *orient_deg, float *orient_conf,
|
||||
const char **script_name, float *script_conf);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
* page_number is a 0-based page index that will appear in the osd file.
|
||||
*/
|
||||
char *GetOsdText(int page_number);
|
||||
|
||||
/** Returns the (average) confidence value between 0 and 100. */
|
||||
int MeanTextConf();
|
||||
/**
|
||||
* Returns all word confidences (between 0 and 100) in an array, terminated
|
||||
* by -1. The calling function must delete [] after use.
|
||||
* The number of confidences should correspond to the number of space-
|
||||
* delimited words in GetUTF8Text.
|
||||
*/
|
||||
int *AllWordConfidences();
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
/**
|
||||
* Applies the given word to the adaptive classifier if possible.
|
||||
* The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
|
||||
* tell the boundaries of the graphemes.
|
||||
* Assumes that SetImage/SetRectangle have been used to set the image
|
||||
* to the given word. The mode arg should be PSM_SINGLE_WORD or
|
||||
* PSM_CIRCLE_WORD, as that will be used to control layout analysis.
|
||||
* The currently set PageSegMode is preserved.
|
||||
* Returns false if adaption was not possible for some reason.
|
||||
*/
|
||||
bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Free up recognition results and any stored image data, without actually
|
||||
* freeing any recognition data that would be time-consuming to reload.
|
||||
* Afterwards, you must call SetImage or TesseractRect before doing
|
||||
* any Recognize or Get* operation.
|
||||
*/
|
||||
void Clear();
|
||||
|
||||
/**
|
||||
* Close down tesseract and free up all memory. End() is equivalent to
|
||||
* destructing and reconstructing your TessBaseAPI.
|
||||
* Once End() has been used, none of the other API functions may be used
|
||||
* other than Init and anything declared above it in the class definition.
|
||||
*/
|
||||
void End();
|
||||
|
||||
/**
|
||||
* Clear any library-level memory caches.
|
||||
* There are a variety of expensive-to-load constant data structures (mostly
|
||||
* language dictionaries) that are cached globally -- surviving the Init()
|
||||
* and End() of individual TessBaseAPI's. This function allows the clearing
|
||||
* of these caches.
|
||||
**/
|
||||
static void ClearPersistentCache();
|
||||
|
||||
/**
|
||||
* Check whether a word is valid according to Tesseract's language model
|
||||
* @return 0 if the word is invalid, non-zero if valid.
|
||||
* @warning temporary! This function will be removed from here and placed
|
||||
* in a separate API at some future time.
|
||||
*/
|
||||
int IsValidWord(const char *word) const;
|
||||
// Returns true if utf8_character is defined in the UniCharset.
|
||||
bool IsValidCharacter(const char *utf8_character) const;
|
||||
|
||||
bool GetTextDirection(int *out_offset, float *out_slope);
|
||||
|
||||
/** Sets Dict::letter_is_okay_ function to point to the given function. */
|
||||
void SetDictFunc(DictFunc f);
|
||||
|
||||
/** Sets Dict::probability_in_context_ function to point to the given
|
||||
* function.
|
||||
*/
|
||||
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
|
||||
|
||||
/**
|
||||
* Estimates the Orientation And Script of the image.
|
||||
* @return true if the image was processed successfully.
|
||||
*/
|
||||
bool DetectOS(OSResults *);
|
||||
|
||||
/**
|
||||
* Return text orientation of each block as determined by an earlier run
|
||||
* of layout analysis.
|
||||
*/
|
||||
void GetBlockTextOrientations(int **block_orientation,
|
||||
bool **vertical_writing);
|
||||
|
||||
/** This method returns the string form of the specified unichar. */
|
||||
const char *GetUnichar(int unichar_id) const;
|
||||
|
||||
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
|
||||
const Dawg *GetDawg(int i) const;
|
||||
|
||||
/** Return the number of dawgs loaded into tesseract_ object. */
|
||||
int NumDawgs() const;
|
||||
|
||||
Tesseract *tesseract() const {
|
||||
return tesseract_;
|
||||
}
|
||||
|
||||
OcrEngineMode oem() const {
|
||||
return last_oem_requested_;
|
||||
}
|
||||
|
||||
void set_min_orientation_margin(double margin);
|
||||
/* @} */
|
||||
|
||||
protected:
|
||||
/** Common code for setting the image. Returns true if Init has been called.
|
||||
*/
|
||||
bool InternalSetImage();
|
||||
|
||||
/**
|
||||
* Run the thresholder to make the thresholded image. If pix is not nullptr,
|
||||
* the source is thresholded to pix instead of the internal IMAGE.
|
||||
*/
|
||||
virtual bool Threshold(Pix **pix);
|
||||
|
||||
/**
|
||||
* Find lines from the image making the BLOCK_LIST.
|
||||
* @return 0 on success.
|
||||
*/
|
||||
int FindLines();
|
||||
|
||||
/** Delete the pageres and block list ready for a new page. */
|
||||
void ClearResults();
|
||||
|
||||
/**
|
||||
* Return an LTR Result Iterator -- used only for training, as we really want
|
||||
* to ignore all BiDi smarts at that point.
|
||||
* delete once you're done with it.
|
||||
*/
|
||||
LTRResultIterator *GetLTRIterator();
|
||||
|
||||
/**
|
||||
* Return the length of the output text string, as UTF8, assuming
|
||||
* one newline per line and one per block, with a terminator,
|
||||
* and assuming a single character reject marker for each rejected character.
|
||||
* Also return the number of recognized blobs in blob_count.
|
||||
*/
|
||||
int TextLength(int *blob_count) const;
|
||||
|
||||
//// paragraphs.cpp ////////////////////////////////////////////////////
|
||||
void DetectParagraphs(bool after_text_recognition);
|
||||
|
||||
const PAGE_RES *GetPageRes() const {
|
||||
return page_res_;
|
||||
}
|
||||
|
||||
protected:
|
||||
Tesseract *tesseract_; ///< The underlying data object.
|
||||
Tesseract *osd_tesseract_; ///< For orientation & script detection.
|
||||
EquationDetect *equ_detect_; ///< The equation detector.
|
||||
FileReader reader_; ///< Reads files from any filesystem.
|
||||
ImageThresholder *thresholder_; ///< Image thresholding module.
|
||||
std::vector<ParagraphModel *> *paragraph_models_;
|
||||
BLOCK_LIST *block_list_; ///< The page layout.
|
||||
PAGE_RES *page_res_; ///< The page-level data.
|
||||
std::string input_file_; ///< Name used by training code.
|
||||
std::string output_file_; ///< Name used by debug code.
|
||||
std::string datapath_; ///< Current location of tessdata.
|
||||
std::string language_; ///< Last initialized language.
|
||||
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
|
||||
bool recognition_done_; ///< page_res_ contains recognition data.
|
||||
|
||||
/**
|
||||
* @defgroup ThresholderParams Thresholder Parameters
|
||||
* Parameters saved from the Thresholder. Needed to rebuild coordinates.
|
||||
*/
|
||||
/* @{ */
|
||||
int rect_left_;
|
||||
int rect_top_;
|
||||
int rect_width_;
|
||||
int rect_height_;
|
||||
int image_width_;
|
||||
int image_height_;
|
||||
/* @} */
|
||||
|
||||
private:
|
||||
// A list of image filenames gets special consideration
|
||||
bool ProcessPagesFileList(FILE *fp, std::string *buf,
|
||||
const char *retry_config, int timeout_millisec,
|
||||
TessResultRenderer *renderer,
|
||||
int tessedit_page_number);
|
||||
// TIFF supports multipage so gets special consideration.
|
||||
bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
|
||||
const char *filename, const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer,
|
||||
int tessedit_page_number);
|
||||
}; // class TessBaseAPI.
|
||||
|
||||
/** Escape a char string - remove &<>"' with HTML codes. */
|
||||
std::string HOcrEscape(const char *text);
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_API_BASEAPI_H_
|
|
@ -1,484 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: capi.h
|
||||
// Description: C-API TessBaseAPI
|
||||
//
|
||||
// (C) Copyright 2012, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef API_CAPI_H_
|
||||
#define API_CAPI_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
# include <tesseract/baseapi.h>
|
||||
# include <tesseract/ocrclass.h>
|
||||
# include <tesseract/pageiterator.h>
|
||||
# include <tesseract/renderer.h>
|
||||
# include <tesseract/resultiterator.h>
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef BOOL
|
||||
# define BOOL int
|
||||
# define TRUE 1
|
||||
# define FALSE 0
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
typedef tesseract::TessResultRenderer TessResultRenderer;
|
||||
typedef tesseract::TessBaseAPI TessBaseAPI;
|
||||
typedef tesseract::PageIterator TessPageIterator;
|
||||
typedef tesseract::ResultIterator TessResultIterator;
|
||||
typedef tesseract::MutableIterator TessMutableIterator;
|
||||
typedef tesseract::ChoiceIterator TessChoiceIterator;
|
||||
typedef tesseract::OcrEngineMode TessOcrEngineMode;
|
||||
typedef tesseract::PageSegMode TessPageSegMode;
|
||||
typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
|
||||
typedef tesseract::Orientation TessOrientation;
|
||||
typedef tesseract::ParagraphJustification TessParagraphJustification;
|
||||
typedef tesseract::WritingDirection TessWritingDirection;
|
||||
typedef tesseract::TextlineOrder TessTextlineOrder;
|
||||
typedef tesseract::PolyBlockType TessPolyBlockType;
|
||||
typedef tesseract::ETEXT_DESC ETEXT_DESC;
|
||||
#else
|
||||
typedef struct TessResultRenderer TessResultRenderer;
|
||||
typedef struct TessBaseAPI TessBaseAPI;
|
||||
typedef struct TessPageIterator TessPageIterator;
|
||||
typedef struct TessResultIterator TessResultIterator;
|
||||
typedef struct TessMutableIterator TessMutableIterator;
|
||||
typedef struct TessChoiceIterator TessChoiceIterator;
|
||||
typedef enum TessOcrEngineMode {
|
||||
OEM_TESSERACT_ONLY,
|
||||
OEM_LSTM_ONLY,
|
||||
OEM_TESSERACT_LSTM_COMBINED,
|
||||
OEM_DEFAULT
|
||||
} TessOcrEngineMode;
|
||||
typedef enum TessPageSegMode {
|
||||
PSM_OSD_ONLY,
|
||||
PSM_AUTO_OSD,
|
||||
PSM_AUTO_ONLY,
|
||||
PSM_AUTO,
|
||||
PSM_SINGLE_COLUMN,
|
||||
PSM_SINGLE_BLOCK_VERT_TEXT,
|
||||
PSM_SINGLE_BLOCK,
|
||||
PSM_SINGLE_LINE,
|
||||
PSM_SINGLE_WORD,
|
||||
PSM_CIRCLE_WORD,
|
||||
PSM_SINGLE_CHAR,
|
||||
PSM_SPARSE_TEXT,
|
||||
PSM_SPARSE_TEXT_OSD,
|
||||
PSM_RAW_LINE,
|
||||
PSM_COUNT
|
||||
} TessPageSegMode;
|
||||
typedef enum TessPageIteratorLevel {
|
||||
RIL_BLOCK,
|
||||
RIL_PARA,
|
||||
RIL_TEXTLINE,
|
||||
RIL_WORD,
|
||||
RIL_SYMBOL
|
||||
} TessPageIteratorLevel;
|
||||
typedef enum TessPolyBlockType {
|
||||
PT_UNKNOWN,
|
||||
PT_FLOWING_TEXT,
|
||||
PT_HEADING_TEXT,
|
||||
PT_PULLOUT_TEXT,
|
||||
PT_EQUATION,
|
||||
PT_INLINE_EQUATION,
|
||||
PT_TABLE,
|
||||
PT_VERTICAL_TEXT,
|
||||
PT_CAPTION_TEXT,
|
||||
PT_FLOWING_IMAGE,
|
||||
PT_HEADING_IMAGE,
|
||||
PT_PULLOUT_IMAGE,
|
||||
PT_HORZ_LINE,
|
||||
PT_VERT_LINE,
|
||||
PT_NOISE,
|
||||
PT_COUNT
|
||||
} TessPolyBlockType;
|
||||
typedef enum TessOrientation {
|
||||
ORIENTATION_PAGE_UP,
|
||||
ORIENTATION_PAGE_RIGHT,
|
||||
ORIENTATION_PAGE_DOWN,
|
||||
ORIENTATION_PAGE_LEFT
|
||||
} TessOrientation;
|
||||
typedef enum TessParagraphJustification {
|
||||
JUSTIFICATION_UNKNOWN,
|
||||
JUSTIFICATION_LEFT,
|
||||
JUSTIFICATION_CENTER,
|
||||
JUSTIFICATION_RIGHT
|
||||
} TessParagraphJustification;
|
||||
typedef enum TessWritingDirection {
|
||||
WRITING_DIRECTION_LEFT_TO_RIGHT,
|
||||
WRITING_DIRECTION_RIGHT_TO_LEFT,
|
||||
WRITING_DIRECTION_TOP_TO_BOTTOM
|
||||
} TessWritingDirection;
|
||||
typedef enum TessTextlineOrder {
|
||||
TEXTLINE_ORDER_LEFT_TO_RIGHT,
|
||||
TEXTLINE_ORDER_RIGHT_TO_LEFT,
|
||||
TEXTLINE_ORDER_TOP_TO_BOTTOM
|
||||
} TessTextlineOrder;
|
||||
typedef struct ETEXT_DESC ETEXT_DESC;
|
||||
#endif
|
||||
|
||||
typedef bool (*TessCancelFunc)(void *cancel_this, int words);
|
||||
typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
|
||||
int bottom);
|
||||
|
||||
struct Pix;
|
||||
struct Boxa;
|
||||
struct Pixa;
|
||||
|
||||
/* General free functions */
|
||||
|
||||
TESS_API const char *TessVersion();
|
||||
TESS_API void TessDeleteText(const char *text);
|
||||
TESS_API void TessDeleteTextArray(char **arr);
|
||||
TESS_API void TessDeleteIntArray(const int *arr);
|
||||
|
||||
/* Renderer API */
|
||||
TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
|
||||
BOOL font_info);
|
||||
TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
|
||||
const char *datadir,
|
||||
BOOL textonly);
|
||||
TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
|
||||
TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
|
||||
const char *outputbase);
|
||||
|
||||
TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
|
||||
TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
|
||||
TessResultRenderer *next);
|
||||
TESS_API TessResultRenderer *TessResultRendererNext(
|
||||
TessResultRenderer *renderer);
|
||||
TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
|
||||
const char *title);
|
||||
TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
|
||||
TessBaseAPI *api);
|
||||
TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
|
||||
|
||||
TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
|
||||
TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
|
||||
TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
|
||||
|
||||
/* Base API */
|
||||
|
||||
TESS_API TessBaseAPI *TessBaseAPICreate();
|
||||
TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
|
||||
|
||||
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
|
||||
|
||||
TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
|
||||
TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
|
||||
TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
|
||||
TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
|
||||
|
||||
TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
|
||||
const char *value);
|
||||
TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
|
||||
const char *value);
|
||||
|
||||
TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
|
||||
const char *name, int *value);
|
||||
TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
|
||||
const char *name, BOOL *value);
|
||||
TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
|
||||
const char *name, double *value);
|
||||
TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
|
||||
const char *name);
|
||||
|
||||
TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
|
||||
TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
|
||||
TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode oem,
|
||||
char **configs, int configs_size);
|
||||
TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode oem);
|
||||
TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language);
|
||||
|
||||
TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
|
||||
const char *language, TessOcrEngineMode mode,
|
||||
char **configs, int configs_size, char **vars_vec,
|
||||
char **vars_values, size_t vars_vec_size,
|
||||
BOOL set_only_non_debug_params);
|
||||
|
||||
TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
|
||||
const char *language, TessOcrEngineMode mode,
|
||||
char **configs, int configs_size, char **vars_vec,
|
||||
char **vars_values, size_t vars_vec_size,
|
||||
BOOL set_only_non_debug_params);
|
||||
|
||||
TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
|
||||
const TessBaseAPI *handle);
|
||||
TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
|
||||
const TessBaseAPI *handle);
|
||||
TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
|
||||
const TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
|
||||
const char *filename);
|
||||
|
||||
TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
|
||||
TessPageSegMode mode);
|
||||
TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
|
||||
const unsigned char *imagedata,
|
||||
int bytes_per_pixel, int bytes_per_line,
|
||||
int left, int top, int width, int height);
|
||||
|
||||
TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
|
||||
const unsigned char *imagedata, int width,
|
||||
int height, int bytes_per_pixel,
|
||||
int bytes_per_line);
|
||||
TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
|
||||
|
||||
TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
|
||||
|
||||
TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
|
||||
int width, int height);
|
||||
|
||||
TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
|
||||
TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
|
||||
struct Pixa **pixa);
|
||||
TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
|
||||
struct Pixa **pixa,
|
||||
int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
|
||||
BOOL raw_image, int raw_padding,
|
||||
struct Pixa **pixa,
|
||||
int **blockids, int **paraids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
|
||||
struct Pixa **pixa, int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
|
||||
struct Pixa **pixa);
|
||||
TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
|
||||
struct Pixa **cc);
|
||||
TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
|
||||
TessPageIteratorLevel level,
|
||||
BOOL text_only,
|
||||
struct Pixa **pixa,
|
||||
int **blockids);
|
||||
TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
|
||||
TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
|
||||
BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
|
||||
int **paraids);
|
||||
|
||||
TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
|
||||
const TessBaseAPI *handle);
|
||||
|
||||
TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
|
||||
|
||||
TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
|
||||
const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
|
||||
int page_index, const char *filename,
|
||||
const char *retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer *renderer);
|
||||
|
||||
TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
|
||||
TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
|
||||
TessBaseAPI *handle);
|
||||
|
||||
TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
|
||||
TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
|
||||
TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
|
||||
int page_number);
|
||||
|
||||
TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
|
||||
TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
|
||||
TessPageSegMode mode,
|
||||
const char *wordstr);
|
||||
#endif // #ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
|
||||
TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
|
||||
|
||||
TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
|
||||
TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
|
||||
float *out_slope);
|
||||
|
||||
TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
|
||||
|
||||
TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
// Call TessDeleteText(*best_script_name) to free memory allocated by this
|
||||
// function
|
||||
TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
|
||||
int *orient_deg,
|
||||
float *orient_conf,
|
||||
const char **script_name,
|
||||
float *script_conf);
|
||||
#endif // #ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
|
||||
double margin);
|
||||
|
||||
TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
|
||||
|
||||
TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
|
||||
int **block_orientation,
|
||||
bool **vertical_writing);
|
||||
|
||||
/* Page iterator */
|
||||
|
||||
TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
|
||||
|
||||
TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
|
||||
|
||||
TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
|
||||
|
||||
TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
|
||||
TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
|
||||
TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
TessPageIteratorLevel element);
|
||||
|
||||
TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
int *left, int *top, int *right,
|
||||
int *bottom);
|
||||
|
||||
TESS_API TessPolyBlockType
|
||||
TessPageIteratorBlockType(const TessPageIterator *handle);
|
||||
|
||||
TESS_API struct Pix *TessPageIteratorGetBinaryImage(
|
||||
const TessPageIterator *handle, TessPageIteratorLevel level);
|
||||
|
||||
TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level,
|
||||
int padding,
|
||||
struct Pix *original_image,
|
||||
int *left, int *top);
|
||||
|
||||
TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
|
||||
TessPageIteratorLevel level, int *x1,
|
||||
int *y1, int *x2, int *y2);
|
||||
|
||||
TESS_API void TessPageIteratorOrientation(
|
||||
TessPageIterator *handle, TessOrientation *orientation,
|
||||
TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
|
||||
float *deskew_angle);
|
||||
|
||||
TESS_API void TessPageIteratorParagraphInfo(
|
||||
TessPageIterator *handle, TessParagraphJustification *justification,
|
||||
BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
|
||||
|
||||
/* Result iterator */
|
||||
|
||||
TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
|
||||
TESS_API TessResultIterator *TessResultIteratorCopy(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
|
||||
TessResultIterator *handle);
|
||||
TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
|
||||
const TessResultIterator *handle);
|
||||
|
||||
TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
|
||||
TessPageIteratorLevel level);
|
||||
TESS_API const char *TessResultIteratorWordRecognitionLanguage(
|
||||
const TessResultIterator *handle);
|
||||
TESS_API const char *TessResultIteratorWordFontAttributes(
|
||||
const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
|
||||
BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
|
||||
int *pointsize, int *font_id);
|
||||
|
||||
TESS_API BOOL
|
||||
TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
|
||||
TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
|
||||
TESS_API BOOL
|
||||
TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
|
||||
|
||||
TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
|
||||
TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
|
||||
TESS_API const char *TessChoiceIteratorGetUTF8Text(
|
||||
const TessChoiceIterator *handle);
|
||||
TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
|
||||
|
||||
/* Progress monitor */
|
||||
|
||||
TESS_API ETEXT_DESC *TessMonitorCreate();
|
||||
TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
|
||||
TessCancelFunc cancelFunc);
|
||||
TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
|
||||
TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
|
||||
TessProgressFunc progressFunc);
|
||||
TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
|
||||
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // API_CAPI_H_
|
|
@ -1,37 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: export.h
|
||||
// Description: Place holder
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_PLATFORM_H_
|
||||
#define TESSERACT_PLATFORM_H_
|
||||
|
||||
#ifndef TESS_API
|
||||
# if defined(_WIN32) || defined(__CYGWIN__)
|
||||
# if defined(TESS_EXPORTS)
|
||||
# define TESS_API __declspec(dllexport)
|
||||
# elif defined(TESS_IMPORTS)
|
||||
# define TESS_API __declspec(dllimport)
|
||||
# else
|
||||
# define TESS_API
|
||||
# endif
|
||||
# else
|
||||
# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
|
||||
# define TESS_API __attribute__((visibility("default")))
|
||||
# else
|
||||
# define TESS_API
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif // TESSERACT_PLATFORM_H_
|
|
@ -1,235 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: ltrresultiterator.h
|
||||
// Description: Iterator for tesseract results in strict left-to-right
|
||||
// order that avoids using tesseract internal data structures.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
||||
|
||||
#include "export.h" // for TESS_API
|
||||
#include "pageiterator.h" // for PageIterator
|
||||
#include "publictypes.h" // for PageIteratorLevel
|
||||
#include "unichar.h" // for StrongScriptDirection
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class BLOB_CHOICE_IT;
|
||||
class PAGE_RES;
|
||||
class WERD_RES;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
// Class to iterate over tesseract results, providing access to all levels
|
||||
// of the page hierarchy, without including any tesseract headers or having
|
||||
// to handle any tesseract structures.
|
||||
// WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
// therefore can only be used while the TessBaseAPI class still exists and
|
||||
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
// DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
// See tesseract/publictypes.h for the definition of PageIteratorLevel.
|
||||
// See also base class PageIterator, which contains the bulk of the interface.
|
||||
// LTRResultIterator adds text-specific methods for access to OCR output.
|
||||
|
||||
class TESS_API LTRResultIterator : public PageIterator {
|
||||
friend class ChoiceIterator;
|
||||
|
||||
public:
|
||||
// page_res and tesseract come directly from the BaseAPI.
|
||||
// The rectangle parameters are copied indirectly from the Thresholder,
|
||||
// via the BaseAPI. They represent the coordinates of some rectangle in an
|
||||
// original image (in top-left-origin coordinates) and therefore the top-left
|
||||
// needs to be added to any output boxes in order to specify coordinates
|
||||
// in the original image. See TessBaseAPI::SetRectangle.
|
||||
// The scale and scaled_yres are in case the Thresholder scaled the image
|
||||
// rectangle prior to thresholding. Any coordinates in tesseract's image
|
||||
// must be divided by scale before adding (rect_left, rect_top).
|
||||
// The scaled_yres indicates the effective resolution of the binary image
|
||||
// that tesseract has been given by the Thresholder.
|
||||
// After the constructor, Begin has already been called.
|
||||
LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
|
||||
int scaled_yres, int rect_left, int rect_top,
|
||||
int rect_width, int rect_height);
|
||||
|
||||
~LTRResultIterator() override;
|
||||
|
||||
// LTRResultIterators may be copied! This makes it possible to iterate over
|
||||
// all the objects at a lower level, while maintaining an iterator to
|
||||
// objects at a higher level. These constructors DO NOT CALL Begin, so
|
||||
// iterations will continue from the location of src.
|
||||
// TODO: For now the copy constructor and operator= only need the base class
|
||||
// versions, but if new data members are added, don't forget to add them!
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
|
||||
// See PageIterator.
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
// Returns the null terminated UTF-8 encoded text string for the current
|
||||
// object at the given level. Use delete [] to free after use.
|
||||
char *GetUTF8Text(PageIteratorLevel level) const;
|
||||
|
||||
// Set the string inserted at the end of each text line. "\n" by default.
|
||||
void SetLineSeparator(const char *new_line);
|
||||
|
||||
// Set the string inserted at the end of each paragraph. "\n" by default.
|
||||
void SetParagraphSeparator(const char *new_para);
|
||||
|
||||
// Returns the mean confidence of the current object at the given level.
|
||||
// The number should be interpreted as a percent probability. (0.0f-100.0f)
|
||||
float Confidence(PageIteratorLevel level) const;
|
||||
|
||||
// ============= Functions that refer to words only ============.
|
||||
|
||||
// Returns the font attributes of the current word. If iterating at a higher
|
||||
// level object than words, eg textlines, then this will return the
|
||||
// attributes of the first word in that textline.
|
||||
// The actual return value is a string representing a font name. It points
|
||||
// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
|
||||
// the iterator itself, ie rendered invalid by various members of
|
||||
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
|
||||
// Pointsize is returned in printers points (1/72 inch.)
|
||||
const char *WordFontAttributes(bool *is_bold, bool *is_italic,
|
||||
bool *is_underlined, bool *is_monospace,
|
||||
bool *is_serif, bool *is_smallcaps,
|
||||
int *pointsize, int *font_id) const;
|
||||
|
||||
// Return the name of the language used to recognize this word.
|
||||
// On error, nullptr. Do not delete this pointer.
|
||||
const char *WordRecognitionLanguage() const;
|
||||
|
||||
// Return the overall directionality of this word.
|
||||
StrongScriptDirection WordDirection() const;
|
||||
|
||||
// Returns true if the current word was found in a dictionary.
|
||||
bool WordIsFromDictionary() const;
|
||||
|
||||
// Returns the number of blanks before the current word.
|
||||
int BlanksBeforeWord() const;
|
||||
|
||||
// Returns true if the current word is numeric.
|
||||
bool WordIsNumeric() const;
|
||||
|
||||
// Returns true if the word contains blamer information.
|
||||
bool HasBlamerInfo() const;
|
||||
|
||||
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
|
||||
// of the current word.
|
||||
const void *GetParamsTrainingBundle() const;
|
||||
|
||||
// Returns a pointer to the string with blamer information for this word.
|
||||
// Assumes that the word's blamer_bundle is not nullptr.
|
||||
const char *GetBlamerDebug() const;
|
||||
|
||||
// Returns a pointer to the string with misadaption information for this word.
|
||||
// Assumes that the word's blamer_bundle is not nullptr.
|
||||
const char *GetBlamerMisadaptionDebug() const;
|
||||
|
||||
// Returns true if a truth string was recorded for the current word.
|
||||
bool HasTruthString() const;
|
||||
|
||||
// Returns true if the given string is equivalent to the truth string for
|
||||
// the current word.
|
||||
bool EquivalentToTruth(const char *str) const;
|
||||
|
||||
// Returns a null terminated UTF-8 encoded truth string for the current word.
|
||||
// Use delete [] to free after use.
|
||||
char *WordTruthUTF8Text() const;
|
||||
|
||||
// Returns a null terminated UTF-8 encoded normalized OCR string for the
|
||||
// current word. Use delete [] to free after use.
|
||||
char *WordNormedUTF8Text() const;
|
||||
|
||||
// Returns a pointer to serialized choice lattice.
|
||||
// Fills lattice_size with the number of bytes in lattice data.
|
||||
const char *WordLattice(int *lattice_size) const;
|
||||
|
||||
// ============= Functions that refer to symbols only ============.
|
||||
|
||||
// Returns true if the current symbol is a superscript.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsSuperscript() const;
|
||||
// Returns true if the current symbol is a subscript.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsSubscript() const;
|
||||
// Returns true if the current symbol is a dropcap.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsDropcap() const;
|
||||
|
||||
protected:
|
||||
const char *line_separator_;
|
||||
const char *paragraph_separator_;
|
||||
};
|
||||
|
||||
// Class to iterate over the classifier choices for a single RIL_SYMBOL.
|
||||
class TESS_API ChoiceIterator {
|
||||
public:
|
||||
// Construction is from a LTRResultIterator that points to the symbol of
|
||||
// interest. The ChoiceIterator allows a one-shot iteration over the
|
||||
// choices for this symbol and after that it is useless.
|
||||
explicit ChoiceIterator(const LTRResultIterator &result_it);
|
||||
~ChoiceIterator();
|
||||
|
||||
// Moves to the next choice for the symbol and returns false if there
|
||||
// are none left.
|
||||
bool Next();
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
// Returns the null terminated UTF-8 encoded text string for the current
|
||||
// choice.
|
||||
// NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
|
||||
// internal structure and should NOT be delete[]ed to free after use.
|
||||
const char *GetUTF8Text() const;
|
||||
|
||||
// Returns the confidence of the current choice depending on the used language
|
||||
// data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
|
||||
// choices for one symbol should roughly add up to 1.0f.
|
||||
// If only traineddata of the legacy engine is used, the number should be
|
||||
// interpreted as a percent probability. (0.0f-100.0f) In this case
|
||||
// probabilities won't add up to 100. Each one stands on its own.
|
||||
float Confidence() const;
|
||||
|
||||
// Returns a vector containing all timesteps, which belong to the currently
|
||||
// selected symbol. A timestep is a vector containing pairs of symbols and
|
||||
// floating point numbers. The number states the probability for the
|
||||
// corresponding symbol.
|
||||
std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
|
||||
|
||||
private:
|
||||
// clears the remaining spaces out of the results and adapt the probabilities
|
||||
void filterSpaces();
|
||||
// Pointer to the WERD_RES object owned by the API.
|
||||
WERD_RES *word_res_;
|
||||
// Iterator over the blob choices.
|
||||
BLOB_CHOICE_IT *choice_it_;
|
||||
std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
|
||||
std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
|
||||
|
||||
const int *tstep_index_;
|
||||
// regulates the rating granularity
|
||||
double rating_coefficient_;
|
||||
// leading blanks
|
||||
int blanks_before_word_;
|
||||
// true when there is lstm engine related trained data
|
||||
bool oemLSTM_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
|
|
@ -1,158 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
/**********************************************************************
|
||||
* File: ocrclass.h
|
||||
* Description: Class definitions and constants for the OCR API.
|
||||
* Author: Hewlett-Packard Co
|
||||
*
|
||||
* (C) Copyright 1996, Hewlett-Packard Co.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
/**********************************************************************
|
||||
* This file contains typedefs for all the structures used by
|
||||
* the HP OCR interface.
|
||||
* The structures are designed to allow them to be used with any
|
||||
* structure alignment up to 8.
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef CCUTIL_OCRCLASS_H_
|
||||
#define CCUTIL_OCRCLASS_H_
|
||||
|
||||
#include <chrono>
|
||||
#include <ctime>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/**********************************************************************
|
||||
* EANYCODE_CHAR
|
||||
* Description of a single character. The character code is defined by
|
||||
* the character set of the current font.
|
||||
* Output text is sent as an array of these structures.
|
||||
* Spaces and line endings in the output are represented in the
|
||||
* structures of the surrounding characters. They are not directly
|
||||
* represented as characters.
|
||||
* The first character in a word has a positive value of blanks.
|
||||
* Missing information should be set to the defaults in the comments.
|
||||
* If word bounds are known, but not character bounds, then the top and
|
||||
* bottom of each character should be those of the word. The left of the
|
||||
* first and right of the last char in each word should be set. All other
|
||||
* lefts and rights should be set to -1.
|
||||
* If set, the values of right and bottom are left+width and top+height.
|
||||
* Most of the members come directly from the parameters to ocr_append_char.
|
||||
* The formatting member uses the enhancement parameter and combines the
|
||||
* line direction stuff into the top 3 bits.
|
||||
* The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
|
||||
* 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
|
||||
* the coding is, only that it is backwards compatible with the previous
|
||||
* version.
|
||||
**********************************************************************/
|
||||
|
||||
struct EANYCODE_CHAR { /*single character */
|
||||
// It should be noted that the format for char_code for version 2.0 and beyond
|
||||
// is UTF8 which means that ASCII characters will come out as one structure
|
||||
// but other characters will be returned in two or more instances of this
|
||||
// structure with a single byte of the UTF8 code in each, but each will have
|
||||
// the same bounding box. Programs which want to handle languagues with
|
||||
// different characters sets will need to handle extended characters
|
||||
// appropriately, but *all* code needs to be prepared to receive UTF8 coded
|
||||
// characters for characters such as bullet and fancy quotes.
|
||||
uint16_t char_code; /*character itself */
|
||||
int16_t left; /*of char (-1) */
|
||||
int16_t right; /*of char (-1) */
|
||||
int16_t top; /*of char (-1) */
|
||||
int16_t bottom; /*of char (-1) */
|
||||
int16_t font_index; /*what font (0) */
|
||||
uint8_t confidence; /*0=perfect, 100=reject (0/100) */
|
||||
uint8_t point_size; /*of char, 72=i inch, (10) */
|
||||
int8_t blanks; /*no of spaces before this char (1) */
|
||||
uint8_t formatting; /*char formatting (0) */
|
||||
};
|
||||
|
||||
/**********************************************************************
|
||||
* ETEXT_DESC
|
||||
* Description of the output of the OCR engine.
|
||||
* This structure is used as both a progress monitor and the final
|
||||
* output header, since it needs to be a valid progress monitor while
|
||||
* the OCR engine is storing its output to shared memory.
|
||||
* During progress, all the buffer info is -1.
|
||||
* Progress starts at 0 and increases to 100 during OCR. No other constraint.
|
||||
* Additionally the progress callback contains the bounding box of the word that
|
||||
* is currently being processed.
|
||||
* Every progress callback, the OCR engine must set ocr_alive to 1.
|
||||
* The HP side will set ocr_alive to 0. Repeated failure to reset
|
||||
* to 1 indicates that the OCR engine is dead.
|
||||
* If the cancel function is not null then it is called with the number of
|
||||
* user words found. If it returns true then operation is cancelled.
|
||||
**********************************************************************/
|
||||
class ETEXT_DESC;
|
||||
|
||||
using CANCEL_FUNC = bool (*)(void *, int);
|
||||
using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
|
||||
using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
|
||||
|
||||
class ETEXT_DESC { // output header
|
||||
public:
|
||||
int16_t count{0}; /// chars in this buffer(0)
|
||||
int16_t progress{0}; /// percent complete increasing (0-100)
|
||||
/** Progress monitor covers word recognition and it does not cover layout
|
||||
* analysis.
|
||||
* See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
|
||||
int8_t more_to_come{0}; /// true if not last
|
||||
volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
|
||||
int8_t err_code{0}; /// for errcode use
|
||||
CANCEL_FUNC cancel{nullptr}; /// returns true to cancel
|
||||
PROGRESS_FUNC progress_callback{
|
||||
nullptr}; /// called whenever progress increases
|
||||
PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
|
||||
void *cancel_this{nullptr}; /// this or other data for cancel
|
||||
std::chrono::steady_clock::time_point end_time;
|
||||
/// Time to stop. Expected to be set only
|
||||
/// by call to set_deadline_msecs().
|
||||
EANYCODE_CHAR text[1]{}; /// character data
|
||||
|
||||
ETEXT_DESC() : progress_callback2(&default_progress_func) {
|
||||
end_time = std::chrono::time_point<std::chrono::steady_clock,
|
||||
std::chrono::milliseconds>();
|
||||
}
|
||||
|
||||
// Sets the end time to be deadline_msecs milliseconds from now.
|
||||
void set_deadline_msecs(int32_t deadline_msecs) {
|
||||
if (deadline_msecs > 0) {
|
||||
end_time = std::chrono::steady_clock::now() +
|
||||
std::chrono::milliseconds(deadline_msecs);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns false if we've not passed the end_time, or have not set a deadline.
|
||||
bool deadline_exceeded() const {
|
||||
if (end_time.time_since_epoch() ==
|
||||
std::chrono::steady_clock::duration::zero()) {
|
||||
return false;
|
||||
}
|
||||
auto now = std::chrono::steady_clock::now();
|
||||
return (now > end_time);
|
||||
}
|
||||
|
||||
private:
|
||||
static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
|
||||
int top, int bottom) {
|
||||
if (ths->progress_callback != nullptr) {
|
||||
return (*(ths->progress_callback))(ths->progress, left, right, top,
|
||||
bottom);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // CCUTIL_OCRCLASS_H_
|
|
@ -1,139 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: osdetect.h
|
||||
// Description: Orientation and script detection.
|
||||
// Author: Samuel Charron
|
||||
// Ranjith Unnikrishnan
|
||||
//
|
||||
// (C) Copyright 2008, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_OSDETECT_H_
|
||||
#define TESSERACT_CCMAIN_OSDETECT_H_
|
||||
|
||||
#include "export.h" // for TESS_API
|
||||
|
||||
#include <vector> // for std::vector
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class BLOBNBOX;
|
||||
class BLOBNBOX_CLIST;
|
||||
class BLOB_CHOICE_LIST;
|
||||
class TO_BLOCK_LIST;
|
||||
class UNICHARSET;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
|
||||
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
|
||||
|
||||
struct OSBestResult {
|
||||
OSBestResult()
|
||||
: orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
|
||||
int orientation_id;
|
||||
int script_id;
|
||||
float sconfidence;
|
||||
float oconfidence;
|
||||
};
|
||||
|
||||
struct OSResults {
|
||||
OSResults() : unicharset(nullptr) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
|
||||
scripts_na[i][j] = 0;
|
||||
}
|
||||
orientations[i] = 0;
|
||||
}
|
||||
}
|
||||
void update_best_orientation();
|
||||
// Set the estimate of the orientation to the given id.
|
||||
void set_best_orientation(int orientation_id);
|
||||
// Update/Compute the best estimate of the script assuming the given
|
||||
// orientation id.
|
||||
void update_best_script(int orientation_id);
|
||||
// Return the index of the script with the highest score for this orientation.
|
||||
TESS_API int get_best_script(int orientation_id) const;
|
||||
// Accumulate scores with given OSResults instance and update the best script.
|
||||
void accumulate(const OSResults &osr);
|
||||
|
||||
// Print statistics.
|
||||
void print_scores(void) const;
|
||||
void print_scores(int orientation_id) const;
|
||||
|
||||
// Array holding scores for each orientation id [0,3].
|
||||
// Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
|
||||
// page respectively, where the values refer to the amount of clockwise
|
||||
// rotation to be applied to the page for the text to be upright and readable.
|
||||
float orientations[4];
|
||||
// Script confidence scores for each of 4 possible orientations.
|
||||
float scripts_na[4][kMaxNumberOfScripts];
|
||||
|
||||
UNICHARSET *unicharset;
|
||||
OSBestResult best_result;
|
||||
};
|
||||
|
||||
class OrientationDetector {
|
||||
public:
|
||||
OrientationDetector(const std::vector<int> *allowed_scripts,
|
||||
OSResults *results);
|
||||
bool detect_blob(BLOB_CHOICE_LIST *scores);
|
||||
int get_orientation();
|
||||
|
||||
private:
|
||||
OSResults *osr_;
|
||||
const std::vector<int> *allowed_scripts_;
|
||||
};
|
||||
|
||||
class ScriptDetector {
|
||||
public:
|
||||
ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
void detect_blob(BLOB_CHOICE_LIST *scores);
|
||||
bool must_stop(int orientation) const;
|
||||
|
||||
private:
|
||||
OSResults *osr_;
|
||||
static const char *korean_script_;
|
||||
static const char *japanese_script_;
|
||||
static const char *fraktur_script_;
|
||||
int korean_id_;
|
||||
int japanese_id_;
|
||||
int katakana_id_;
|
||||
int hiragana_id_;
|
||||
int han_id_;
|
||||
int hangul_id_;
|
||||
int latin_id_;
|
||||
int fraktur_id_;
|
||||
tesseract::Tesseract *tess_;
|
||||
const std::vector<int> *allowed_scripts_;
|
||||
};
|
||||
|
||||
int orientation_and_script_detection(const char *filename, OSResults *,
|
||||
tesseract::Tesseract *);
|
||||
|
||||
int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
|
||||
int os_detect_blobs(const std::vector<int> *allowed_scripts,
|
||||
BLOBNBOX_CLIST *blob_list, OSResults *osr,
|
||||
tesseract::Tesseract *tess);
|
||||
|
||||
bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
|
||||
OSResults *, tesseract::Tesseract *tess);
|
||||
|
||||
// Helper method to convert an orientation index to its value in degrees.
|
||||
// The value represents the amount of clockwise rotation in degrees that must be
|
||||
// applied for the text to be upright (readable).
|
||||
TESS_API int OrientationIdToValue(const int &id);
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCMAIN_OSDETECT_H_
|
|
@ -1,364 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: pageiterator.h
|
||||
// Description: Iterator for tesseract page structure that avoids using
|
||||
// tesseract internal data structures.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_PAGEITERATOR_H_
|
||||
|
||||
#include "export.h"
|
||||
#include "publictypes.h"
|
||||
|
||||
struct Pix;
|
||||
struct Pta;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
struct BlamerBundle;
|
||||
class C_BLOB_IT;
|
||||
class PAGE_RES;
|
||||
class PAGE_RES_IT;
|
||||
class WERD;
|
||||
|
||||
class Tesseract;
|
||||
|
||||
/**
|
||||
* Class to iterate over tesseract page structure, providing access to all
|
||||
* levels of the page hierarchy, without including any tesseract headers or
|
||||
* having to handle any tesseract structures.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
* See tesseract/publictypes.h for the definition of PageIteratorLevel.
|
||||
* See also ResultIterator, derived from PageIterator, which adds in the
|
||||
* ability to access OCR output with text-specific methods.
|
||||
*/
|
||||
|
||||
class TESS_API PageIterator {
|
||||
public:
|
||||
/**
|
||||
* page_res and tesseract come directly from the BaseAPI.
|
||||
* The rectangle parameters are copied indirectly from the Thresholder,
|
||||
* via the BaseAPI. They represent the coordinates of some rectangle in an
|
||||
* original image (in top-left-origin coordinates) and therefore the top-left
|
||||
* needs to be added to any output boxes in order to specify coordinates
|
||||
* in the original image. See TessBaseAPI::SetRectangle.
|
||||
* The scale and scaled_yres are in case the Thresholder scaled the image
|
||||
* rectangle prior to thresholding. Any coordinates in tesseract's image
|
||||
* must be divided by scale before adding (rect_left, rect_top).
|
||||
* The scaled_yres indicates the effective resolution of the binary image
|
||||
* that tesseract has been given by the Thresholder.
|
||||
* After the constructor, Begin has already been called.
|
||||
*/
|
||||
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
|
||||
int scaled_yres, int rect_left, int rect_top, int rect_width,
|
||||
int rect_height);
|
||||
virtual ~PageIterator();
|
||||
|
||||
/**
|
||||
* Page/ResultIterators may be copied! This makes it possible to iterate over
|
||||
* all the objects at a lower level, while maintaining an iterator to
|
||||
* objects at a higher level. These constructors DO NOT CALL Begin, so
|
||||
* iterations will continue from the location of src.
|
||||
*/
|
||||
PageIterator(const PageIterator &src);
|
||||
const PageIterator &operator=(const PageIterator &src);
|
||||
|
||||
/** Are we positioned at the same location as other? */
|
||||
bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
|
||||
/**
|
||||
* Moves the iterator to point to the start of the page to begin an
|
||||
* iteration.
|
||||
*/
|
||||
virtual void Begin();
|
||||
|
||||
/**
|
||||
* Moves the iterator to the beginning of the paragraph.
|
||||
* This class implements this functionality by moving it to the zero indexed
|
||||
* blob of the first (leftmost) word on the first row of the paragraph.
|
||||
*/
|
||||
virtual void RestartParagraph();
|
||||
|
||||
/**
|
||||
* Return whether this iterator points anywhere in the first textline of a
|
||||
* paragraph.
|
||||
*/
|
||||
bool IsWithinFirstTextlineOfParagraph() const;
|
||||
|
||||
/**
|
||||
* Moves the iterator to the beginning of the text line.
|
||||
* This class implements this functionality by moving it to the zero indexed
|
||||
* blob of the first (leftmost) word of the row.
|
||||
*/
|
||||
virtual void RestartRow();
|
||||
|
||||
/**
|
||||
* Moves to the start of the next object at the given level in the
|
||||
* page hierarchy, and returns false if the end of the page was reached.
|
||||
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
|
||||
* PageIteratorLevel level values will visit each non-text block once.
|
||||
* Think of non text blocks as containing a single para, with a single line,
|
||||
* with a single imaginary word.
|
||||
* Calls to Next with different levels may be freely intermixed.
|
||||
* This function iterates words in right-to-left scripts correctly, if
|
||||
* the appropriate language has been loaded into Tesseract.
|
||||
*/
|
||||
virtual bool Next(PageIteratorLevel level);
|
||||
|
||||
/**
|
||||
* Returns true if the iterator is at the start of an object at the given
|
||||
* level.
|
||||
*
|
||||
* For instance, suppose an iterator it is pointed to the first symbol of the
|
||||
* first word of the third line of the second paragraph of the first block in
|
||||
* a page, then:
|
||||
* it.IsAtBeginningOf(RIL_BLOCK) = false
|
||||
* it.IsAtBeginningOf(RIL_PARA) = false
|
||||
* it.IsAtBeginningOf(RIL_TEXTLINE) = true
|
||||
* it.IsAtBeginningOf(RIL_WORD) = true
|
||||
* it.IsAtBeginningOf(RIL_SYMBOL) = true
|
||||
*/
|
||||
virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns whether the iterator is positioned at the last element in a
|
||||
* given level. (e.g. the last word in a line, the last line in a block)
|
||||
*
|
||||
* Here's some two-paragraph example
|
||||
* text. It starts off innocuously
|
||||
* enough but quickly turns bizarre.
|
||||
* The author inserts a cornucopia
|
||||
* of words to guard against confused
|
||||
* references.
|
||||
*
|
||||
* Now take an iterator it pointed to the start of "bizarre."
|
||||
* it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
|
||||
* it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
|
||||
* it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
|
||||
*/
|
||||
virtual bool IsAtFinalElement(PageIteratorLevel level,
|
||||
PageIteratorLevel element) const;
|
||||
|
||||
/**
|
||||
* Returns whether this iterator is positioned
|
||||
* before other: -1
|
||||
* equal to other: 0
|
||||
* after other: 1
|
||||
*/
|
||||
int Cmp(const PageIterator &other) const;
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
// Coordinate system:
|
||||
// Integer coordinates are at the cracks between the pixels.
|
||||
// The top-left corner of the top-left pixel in the image is at (0,0).
|
||||
// The bottom-right corner of the bottom-right pixel in the image is at
|
||||
// (width, height).
|
||||
// Every bounding box goes from the top-left of the top-left contained
|
||||
// pixel to the bottom-right of the bottom-right contained pixel, so
|
||||
// the bounding box of the single top-left pixel in the image is:
|
||||
// (0,0)->(1,1).
|
||||
// If an image rectangle has been set in the API, then returned coordinates
|
||||
// relate to the original (full) image, rather than the rectangle.
|
||||
|
||||
/**
|
||||
* Controls what to include in a bounding box. Bounding boxes of all levels
|
||||
* between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
|
||||
* Between layout analysis and recognition, it isn't known where all
|
||||
* diacritics belong, so this control is used to include or exclude some
|
||||
* diacritics that are above or below the main body of the word. In most cases
|
||||
* where the placement is obvious, and after recognition, it doesn't make as
|
||||
* much difference, as the diacritics will already be included in the word.
|
||||
*/
|
||||
void SetBoundingBoxComponents(bool include_upper_dots,
|
||||
bool include_lower_dots) {
|
||||
include_upper_dots_ = include_upper_dots;
|
||||
include_lower_dots_ = include_lower_dots;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the bounding rectangle of the current object at the given level.
|
||||
* See comment on coordinate system above.
|
||||
* Returns false if there is no such object at the current position.
|
||||
* The returned bounding box is guaranteed to match the size and position
|
||||
* of the image returned by GetBinaryImage, but may clip foreground pixels
|
||||
* from a grey image. The padding argument to GetImage can be used to expand
|
||||
* the image to include more foreground pixels. See GetImage below.
|
||||
*/
|
||||
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
|
||||
int *bottom) const;
|
||||
bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
|
||||
int *right, int *bottom) const;
|
||||
/**
|
||||
* Returns the bounding rectangle of the object in a coordinate system of the
|
||||
* working image rectangle having its origin at (rect_left_, rect_top_) with
|
||||
* respect to the original image and is scaled by a factor scale_.
|
||||
*/
|
||||
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
|
||||
int *right, int *bottom) const;
|
||||
|
||||
/** Returns whether there is no object of a given level. */
|
||||
bool Empty(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns the type of the current block.
|
||||
* See tesseract/publictypes.h for PolyBlockType.
|
||||
*/
|
||||
PolyBlockType BlockType() const;
|
||||
|
||||
/**
|
||||
* Returns the polygon outline of the current block. The returned Pta must
|
||||
* be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
|
||||
* of the polygon, and the last edge is the line segment between the last
|
||||
* point and the first point. nullptr will be returned if the iterator is
|
||||
* at the end of the document or layout analysis was not used.
|
||||
*/
|
||||
Pta *BlockPolygon() const;
|
||||
|
||||
/**
|
||||
* Returns a binary image of the current object at the given level.
|
||||
* The position and size match the return from BoundingBoxInternal, and so
|
||||
* this could be upscaled with respect to the original input image.
|
||||
* Use pixDestroy to delete the image after use.
|
||||
*/
|
||||
Pix *GetBinaryImage(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns an image of the current object at the given level in greyscale
|
||||
* if available in the input. To guarantee a binary image use BinaryImage.
|
||||
* NOTE that in order to give the best possible image, the bounds are
|
||||
* expanded slightly over the binary connected component, by the supplied
|
||||
* padding, so the top-left position of the returned image is returned
|
||||
* in (left,top). These will most likely not match the coordinates
|
||||
* returned by BoundingBox.
|
||||
* If you do not supply an original image, you will get a binary one.
|
||||
* Use pixDestroy to delete the image after use.
|
||||
*/
|
||||
Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
|
||||
int *left, int *top) const;
|
||||
|
||||
/**
|
||||
* Returns the baseline of the current object at the given level.
|
||||
* The baseline is the line that passes through (x1, y1) and (x2, y2).
|
||||
* WARNING: with vertical text, baselines may be vertical!
|
||||
* Returns false if there is no baseline at the current position.
|
||||
*/
|
||||
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
|
||||
int *y2) const;
|
||||
|
||||
// Returns the attributes of the current row.
|
||||
void RowAttributes(float *row_height, float *descenders,
|
||||
float *ascenders) const;
|
||||
|
||||
/**
|
||||
* Returns orientation for the block the iterator points to.
|
||||
* orientation, writing_direction, textline_order: see publictypes.h
|
||||
* deskew_angle: after rotating the block so the text orientation is
|
||||
* upright, how many radians does one have to rotate the
|
||||
* block anti-clockwise for it to be level?
|
||||
* -Pi/4 <= deskew_angle <= Pi/4
|
||||
*/
|
||||
void Orientation(tesseract::Orientation *orientation,
|
||||
tesseract::WritingDirection *writing_direction,
|
||||
tesseract::TextlineOrder *textline_order,
|
||||
float *deskew_angle) const;
|
||||
|
||||
/**
|
||||
* Returns information about the current paragraph, if available.
|
||||
*
|
||||
* justification -
|
||||
* LEFT if ragged right, or fully justified and script is left-to-right.
|
||||
* RIGHT if ragged left, or fully justified and script is right-to-left.
|
||||
* unknown if it looks like source code or we have very few lines.
|
||||
* is_list_item -
|
||||
* true if we believe this is a member of an ordered or unordered list.
|
||||
* is_crown -
|
||||
* true if the first line of the paragraph is aligned with the other
|
||||
* lines of the paragraph even though subsequent paragraphs have first
|
||||
* line indents. This typically indicates that this is the continuation
|
||||
* of a previous paragraph or that it is the very first paragraph in
|
||||
* the chapter.
|
||||
* first_line_indent -
|
||||
* For LEFT aligned paragraphs, the first text line of paragraphs of
|
||||
* this kind are indented this many pixels from the left edge of the
|
||||
* rest of the paragraph.
|
||||
* for RIGHT aligned paragraphs, the first text line of paragraphs of
|
||||
* this kind are indented this many pixels from the right edge of the
|
||||
* rest of the paragraph.
|
||||
* NOTE 1: This value may be negative.
|
||||
* NOTE 2: if *is_crown == true, the first line of this paragraph is
|
||||
* actually flush, and first_line_indent is set to the "common"
|
||||
* first_line_indent for subsequent paragraphs in this block
|
||||
* of text.
|
||||
*/
|
||||
void ParagraphInfo(tesseract::ParagraphJustification *justification,
|
||||
bool *is_list_item, bool *is_crown,
|
||||
int *first_line_indent) const;
|
||||
|
||||
// If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
|
||||
// of the current word to the given pointer (takes ownership of the pointer)
|
||||
// and returns true.
|
||||
// Can only be used when iterating on the word level.
|
||||
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Sets up the internal data for iterating the blobs of a new word, then
|
||||
* moves the iterator to the given offset.
|
||||
*/
|
||||
void BeginWord(int offset);
|
||||
|
||||
/** Pointer to the page_res owned by the API. */
|
||||
PAGE_RES *page_res_;
|
||||
/** Pointer to the Tesseract object owned by the API. */
|
||||
Tesseract *tesseract_;
|
||||
/**
|
||||
* The iterator to the page_res_. Owned by this ResultIterator.
|
||||
* A pointer just to avoid dragging in Tesseract includes.
|
||||
*/
|
||||
PAGE_RES_IT *it_;
|
||||
/**
|
||||
* The current input WERD being iterated. If there is an output from OCR,
|
||||
* then word_ is nullptr. Owned by the API
|
||||
*/
|
||||
WERD *word_;
|
||||
/** The length of the current word_. */
|
||||
int word_length_;
|
||||
/** The current blob index within the word. */
|
||||
int blob_index_;
|
||||
/**
|
||||
* Iterator to the blobs within the word. If nullptr, then we are iterating
|
||||
* OCR results in the box_word.
|
||||
* Owned by this ResultIterator.
|
||||
*/
|
||||
C_BLOB_IT *cblob_it_;
|
||||
/** Control over what to include in bounding boxes. */
|
||||
bool include_upper_dots_;
|
||||
bool include_lower_dots_;
|
||||
/** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
|
||||
int scale_;
|
||||
int scaled_yres_;
|
||||
int rect_left_;
|
||||
int rect_top_;
|
||||
int rect_width_;
|
||||
int rect_height_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_
|
|
@ -1,281 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: publictypes.h
|
||||
// Description: Types used in both the API and internally
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// This file contains types that are used both by the API and internally
|
||||
// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
|
||||
// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
|
||||
// Restated: It is OK for low-level Tesseract files to include publictypes.h,
|
||||
// but not for the low-level tesseract code to include top-level API code.
|
||||
// This file should not use other Tesseract types, as that would drag
|
||||
// their includes into the API-level.
|
||||
|
||||
/** Number of printers' points in an inch. The unit of the pointsize return. */
|
||||
constexpr int kPointsPerInch = 72;
|
||||
/**
|
||||
* Minimum believable resolution. Used as a default if there is no other
|
||||
* information, as it is safer to under-estimate than over-estimate.
|
||||
*/
|
||||
constexpr int kMinCredibleResolution = 70;
|
||||
/** Maximum believable resolution. */
|
||||
constexpr int kMaxCredibleResolution = 2400;
|
||||
/**
|
||||
* Ratio between median blob size and likely resolution. Used to estimate
|
||||
* resolution when none is provided. This is basically 1/usual text size in
|
||||
* inches. */
|
||||
constexpr int kResolutionEstimationFactor = 10;
|
||||
|
||||
/**
|
||||
* Possible types for a POLY_BLOCK or ColPartition.
|
||||
* Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
|
||||
* below, as well as kPolyBlockNames in layout_test.cc.
|
||||
* Used extensively by ColPartition, and POLY_BLOCK.
|
||||
*/
|
||||
enum PolyBlockType {
|
||||
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
|
||||
PT_FLOWING_TEXT, // Text that lives inside a column.
|
||||
PT_HEADING_TEXT, // Text that spans more than one column.
|
||||
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
|
||||
PT_EQUATION, // Partition belonging to an equation region.
|
||||
PT_INLINE_EQUATION, // Partition has inline equation.
|
||||
PT_TABLE, // Partition belonging to a table region.
|
||||
PT_VERTICAL_TEXT, // Text-line runs vertically.
|
||||
PT_CAPTION_TEXT, // Text that belongs to an image.
|
||||
PT_FLOWING_IMAGE, // Image that lives inside a column.
|
||||
PT_HEADING_IMAGE, // Image that spans more than one column.
|
||||
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
|
||||
PT_HORZ_LINE, // Horizontal Line.
|
||||
PT_VERT_LINE, // Vertical Line.
|
||||
PT_NOISE, // Lies outside of any column.
|
||||
PT_COUNT
|
||||
};
|
||||
|
||||
/** Returns true if PolyBlockType is of horizontal line type */
|
||||
inline bool PTIsLineType(PolyBlockType type) {
|
||||
return type == PT_HORZ_LINE || type == PT_VERT_LINE;
|
||||
}
|
||||
/** Returns true if PolyBlockType is of image type */
|
||||
inline bool PTIsImageType(PolyBlockType type) {
|
||||
return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
|
||||
type == PT_PULLOUT_IMAGE;
|
||||
}
|
||||
/** Returns true if PolyBlockType is of text type */
|
||||
inline bool PTIsTextType(PolyBlockType type) {
|
||||
return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
|
||||
type == PT_PULLOUT_TEXT || type == PT_TABLE ||
|
||||
type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
|
||||
type == PT_INLINE_EQUATION;
|
||||
}
|
||||
// Returns true if PolyBlockType is of pullout(inter-column) type
|
||||
inline bool PTIsPulloutType(PolyBlockType type) {
|
||||
return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
|
||||
}
|
||||
|
||||
/**
|
||||
* +------------------+ Orientation Example:
|
||||
* | 1 Aaaa Aaaa Aaaa | ====================
|
||||
* | Aaa aa aaa aa | To left is a diagram of some (1) English and
|
||||
* | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit.
|
||||
* | 2 |
|
||||
* | ####### c c C | Upright Latin characters are represented as A and a.
|
||||
* | ####### c c c | '<' represents a latin character rotated
|
||||
* | < ####### c c c | anti-clockwise 90 degrees.
|
||||
* | < ####### c c |
|
||||
* | < ####### . c | Upright Chinese characters are represented C and c.
|
||||
* | 3 ####### c |
|
||||
* +------------------+ NOTA BENE: enum values here should match goodoc.proto
|
||||
|
||||
* If you orient your head so that "up" aligns with Orientation,
|
||||
* then the characters will appear "right side up" and readable.
|
||||
*
|
||||
* In the example above, both the English and Chinese paragraphs are oriented
|
||||
* so their "up" is the top of the page (page up). The photo credit is read
|
||||
* with one's head turned leftward ("up" is to page left).
|
||||
*
|
||||
* The values of this enum match the convention of Tesseract's osdetect.h
|
||||
*/
|
||||
enum Orientation {
|
||||
ORIENTATION_PAGE_UP = 0,
|
||||
ORIENTATION_PAGE_RIGHT = 1,
|
||||
ORIENTATION_PAGE_DOWN = 2,
|
||||
ORIENTATION_PAGE_LEFT = 3,
|
||||
};
|
||||
|
||||
/**
|
||||
* The grapheme clusters within a line of text are laid out logically
|
||||
* in this direction, judged when looking at the text line rotated so that
|
||||
* its Orientation is "page up".
|
||||
*
|
||||
* For English text, the writing direction is left-to-right. For the
|
||||
* Chinese text in the above example, the writing direction is top-to-bottom.
|
||||
*/
|
||||
enum WritingDirection {
|
||||
WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
|
||||
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
|
||||
WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
|
||||
};
|
||||
|
||||
/**
|
||||
* The text lines are read in the given sequence.
|
||||
*
|
||||
* In English, the order is top-to-bottom.
|
||||
* In Chinese, vertical text lines are read right-to-left. Mongolian is
|
||||
* written in vertical columns top to bottom like Chinese, but the lines
|
||||
* order left-to right.
|
||||
*
|
||||
* Note that only some combinations make sense. For example,
|
||||
* WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
|
||||
*/
|
||||
enum TextlineOrder {
|
||||
TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
|
||||
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
|
||||
TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
|
||||
};
|
||||
|
||||
/**
|
||||
* Possible modes for page layout analysis. These *must* be kept in order
|
||||
* of decreasing amount of layout analysis to be done, except for OSD_ONLY,
|
||||
* so that the inequality test macros below work.
|
||||
*/
|
||||
enum PageSegMode {
|
||||
PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
|
||||
PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
|
||||
///< script detection. (OSD)
|
||||
PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
|
||||
PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
|
||||
PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
|
||||
PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
|
||||
///< vertically aligned text.
|
||||
PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
|
||||
PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
|
||||
PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
|
||||
PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
|
||||
PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
|
||||
PSM_SPARSE_TEXT =
|
||||
11, ///< Find as much text as possible in no particular order.
|
||||
PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
|
||||
PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
|
||||
///< hacks that are Tesseract-specific.
|
||||
|
||||
PSM_COUNT ///< Number of enum entries.
|
||||
};
|
||||
|
||||
/**
|
||||
* Inline functions that act on a PageSegMode to determine whether components of
|
||||
* layout analysis are enabled.
|
||||
* *Depend critically on the order of elements of PageSegMode.*
|
||||
* NOTE that arg is an int for compatibility with INT_PARAM.
|
||||
*/
|
||||
inline bool PSM_OSD_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
|
||||
}
|
||||
inline bool PSM_SPARSE(int pageseg_mode) {
|
||||
return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
|
||||
}
|
||||
inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
|
||||
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
|
||||
}
|
||||
inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
|
||||
return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
|
||||
pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
||||
}
|
||||
|
||||
/**
|
||||
* enum of the elements of the page hierarchy, used in ResultIterator
|
||||
* to provide functions that operate on each level without having to
|
||||
* have 5x as many functions.
|
||||
*/
|
||||
enum PageIteratorLevel {
|
||||
RIL_BLOCK, // Block of text/image/separator line.
|
||||
RIL_PARA, // Paragraph within a block.
|
||||
RIL_TEXTLINE, // Line within a paragraph.
|
||||
RIL_WORD, // Word within a textline.
|
||||
RIL_SYMBOL // Symbol/character within a word.
|
||||
};
|
||||
|
||||
/**
|
||||
* JUSTIFICATION_UNKNOWN
|
||||
* The alignment is not clearly one of the other options. This could happen
|
||||
* for example if there are only one or two lines of text or the text looks
|
||||
* like source code or poetry.
|
||||
*
|
||||
* NOTA BENE: Fully justified paragraphs (text aligned to both left and right
|
||||
* margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
|
||||
* is written with a left-to-right script and with JUSTIFICATION_RIGHT if
|
||||
* their text is written in a right-to-left script.
|
||||
*
|
||||
* Interpretation for text read in vertical lines:
|
||||
* "Left" is wherever the starting reading position is.
|
||||
*
|
||||
* JUSTIFICATION_LEFT
|
||||
* Each line, except possibly the first, is flush to the same left tab stop.
|
||||
*
|
||||
* JUSTIFICATION_CENTER
|
||||
* The text lines of the paragraph are centered about a line going
|
||||
* down through their middle of the text lines.
|
||||
*
|
||||
* JUSTIFICATION_RIGHT
|
||||
* Each line, except possibly the first, is flush to the same right tab stop.
|
||||
*/
|
||||
enum ParagraphJustification {
|
||||
JUSTIFICATION_UNKNOWN,
|
||||
JUSTIFICATION_LEFT,
|
||||
JUSTIFICATION_CENTER,
|
||||
JUSTIFICATION_RIGHT,
|
||||
};
|
||||
|
||||
/**
|
||||
* When Tesseract/Cube is initialized we can choose to instantiate/load/run
|
||||
* only the Tesseract part, only the Cube part or both along with the combiner.
|
||||
* The preference of which engine to use is stored in tessedit_ocr_engine_mode.
|
||||
*
|
||||
* ATTENTION: When modifying this enum, please make sure to make the
|
||||
* appropriate changes to all the enums mirroring it (e.g. OCREngine in
|
||||
* cityblock/workflow/detection/detection_storage.proto). Such enums will
|
||||
* mention the connection to OcrEngineMode in the comments.
|
||||
*/
|
||||
enum OcrEngineMode {
|
||||
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
|
||||
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
|
||||
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
|
||||
// to Tesseract when things get difficult.
|
||||
// deprecated
|
||||
OEM_DEFAULT, // Specify this mode when calling init_*(),
|
||||
// to indicate that any of the above modes
|
||||
// should be automatically inferred from the
|
||||
// variables in the language-specific config,
|
||||
// command-line configs, or if not specified
|
||||
// in any of the above should be set to the
|
||||
// default OEM_TESSERACT_ONLY.
|
||||
OEM_COUNT // Number of OEMs
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
|
@ -1,311 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: renderer.h
|
||||
// Description: Rendering interface to inject into TessBaseAPI
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_RENDERER_H_
|
||||
#define TESSERACT_API_RENDERER_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
|
||||
// complexity of includes here. Use forward declarations wherever possible
|
||||
// and hide includes of complex types in baseapi.cpp.
|
||||
#include <cstdint>
|
||||
#include <string> // for std::string
|
||||
#include <vector> // for std::vector
|
||||
|
||||
struct Pix;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class TessBaseAPI;
|
||||
|
||||
/**
|
||||
* Interface for rendering tesseract results into a document, such as text,
|
||||
* HOCR or pdf. This class is abstract. Specific classes handle individual
|
||||
* formats. This interface is then used to inject the renderer class into
|
||||
* tesseract when processing images.
|
||||
*
|
||||
* For simplicity implementing this with tesseract version 3.01,
|
||||
* the renderer contains document state that is cleared from document
|
||||
* to document just as the TessBaseAPI is. This way the base API can just
|
||||
* delegate its rendering functionality to injected renderers, and the
|
||||
* renderers can manage the associated state needed for the specific formats
|
||||
* in addition to the heuristics for producing it.
|
||||
*/
|
||||
class TESS_API TessResultRenderer {
|
||||
public:
|
||||
virtual ~TessResultRenderer();
|
||||
|
||||
// Takes ownership of pointer so must be new'd instance.
|
||||
// Renderers aren't ordered, but appends the sequences of next parameter
|
||||
// and existing next(). The renderers should be unique across both lists.
|
||||
void insert(TessResultRenderer *next);
|
||||
|
||||
// Returns the next renderer or nullptr.
|
||||
TessResultRenderer *next() {
|
||||
return next_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts a new document with the given title.
|
||||
* This clears the contents of the output data.
|
||||
* Title should use UTF-8 encoding.
|
||||
*/
|
||||
bool BeginDocument(const char *title);
|
||||
|
||||
/**
|
||||
* Adds the recognized text from the source image to the current document.
|
||||
* Invalid if BeginDocument not yet called.
|
||||
*
|
||||
* Note that this API is a bit weird but is designed to fit into the
|
||||
* current TessBaseAPI implementation where the api has lots of state
|
||||
* information that we might want to add in.
|
||||
*/
|
||||
bool AddImage(TessBaseAPI *api);
|
||||
|
||||
/**
|
||||
* Finishes the document and finalizes the output data
|
||||
* Invalid if BeginDocument not yet called.
|
||||
*/
|
||||
bool EndDocument();
|
||||
|
||||
const char *file_extension() const {
|
||||
return file_extension_;
|
||||
}
|
||||
const char *title() const {
|
||||
return title_.c_str();
|
||||
}
|
||||
|
||||
// Is everything fine? Otherwise something went wrong.
|
||||
bool happy() const {
|
||||
return happy_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the last image given to AddImage
|
||||
* (i.e. images are incremented whether the image succeeded or not)
|
||||
*
|
||||
* This is always defined. It means either the number of the
|
||||
* current image, the last image ended, or in the completed document
|
||||
* depending on when in the document lifecycle you are looking at it.
|
||||
* Will return -1 if a document was never started.
|
||||
*/
|
||||
int imagenum() const {
|
||||
return imagenum_;
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Called by concrete classes.
|
||||
*
|
||||
* outputbase is the name of the output file excluding
|
||||
* extension. For example, "/path/to/chocolate-chip-cookie-recipe"
|
||||
*
|
||||
* extension indicates the file extension to be used for output
|
||||
* files. For example "pdf" will produce a .pdf file, and "hocr"
|
||||
* will produce .hocr files.
|
||||
*/
|
||||
TessResultRenderer(const char *outputbase, const char *extension);
|
||||
|
||||
// Hook for specialized handling in BeginDocument()
|
||||
virtual bool BeginDocumentHandler();
|
||||
|
||||
// This must be overridden to render the OCR'd results
|
||||
virtual bool AddImageHandler(TessBaseAPI *api) = 0;
|
||||
|
||||
// Hook for specialized handling in EndDocument()
|
||||
virtual bool EndDocumentHandler();
|
||||
|
||||
// Renderers can call this to append '\0' terminated strings into
|
||||
// the output string returned by GetOutput.
|
||||
// This method will grow the output buffer if needed.
|
||||
void AppendString(const char *s);
|
||||
|
||||
// Renderers can call this to append binary byte sequences into
|
||||
// the output string returned by GetOutput. Note that s is not necessarily
|
||||
// '\0' terminated (and can contain '\0' within it).
|
||||
// This method will grow the output buffer if needed.
|
||||
void AppendData(const char *s, int len);
|
||||
|
||||
private:
|
||||
TessResultRenderer *next_; // Can link multiple renderers together
|
||||
FILE *fout_; // output file pointer
|
||||
const char *file_extension_; // standard extension for generated output
|
||||
std::string title_; // title of document being rendered
|
||||
int imagenum_; // index of last image added
|
||||
bool happy_; // I get grumpy when the disk fills up, etc.
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessTextRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessTextRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an hocr text string
|
||||
*/
|
||||
class TESS_API TessHOcrRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessHOcrRenderer(const char *outputbase, bool font_info);
|
||||
explicit TessHOcrRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool font_info_; // whether to print font information
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an alto text string
|
||||
*/
|
||||
class TESS_API TessAltoRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessAltoRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool begin_document;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders Tesseract output into a TSV string
|
||||
*/
|
||||
class TESS_API TessTsvRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessTsvRenderer(const char *outputbase, bool font_info);
|
||||
explicit TessTsvRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
bool font_info_; // whether to print font information
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into searchable PDF
|
||||
*/
|
||||
class TESS_API TessPDFRenderer : public TessResultRenderer {
|
||||
public:
|
||||
// datadir is the location of the TESSDATA. We need it because
|
||||
// we load a custom PDF font from this location.
|
||||
TessPDFRenderer(const char *outputbase, const char *datadir,
|
||||
bool textonly = false);
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
bool EndDocumentHandler() override;
|
||||
|
||||
private:
|
||||
// We don't want to have every image in memory at once,
|
||||
// so we store some metadata as we go along producing
|
||||
// PDFs one page at a time. At the end, that metadata is
|
||||
// used to make everything that isn't easily handled in a
|
||||
// streaming fashion.
|
||||
long int obj_; // counter for PDF objects
|
||||
std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
|
||||
std::vector<long int> pages_; // object number for every /Page object
|
||||
std::string datadir_; // where to find the custom font
|
||||
bool textonly_; // skip images if set
|
||||
// Bookkeeping only. DIY = Do It Yourself.
|
||||
void AppendPDFObjectDIY(size_t objectsize);
|
||||
// Bookkeeping + emit data.
|
||||
void AppendPDFObject(const char *data);
|
||||
// Create the /Contents object for an entire page.
|
||||
char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
|
||||
// Turn an image into a PDF object. Only transcode if we have to.
|
||||
static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
|
||||
char **pdf_object, long int *pdf_object_size,
|
||||
int jpg_quality);
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessUnlvRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessUnlvRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string for LSTMBox
|
||||
*/
|
||||
class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessLSTMBoxRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessBoxTextRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string in WordStr format
|
||||
*/
|
||||
class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessWordStrBoxRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an osd text string
|
||||
*/
|
||||
class TESS_API TessOsdRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessOsdRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
bool AddImageHandler(TessBaseAPI *api) override;
|
||||
};
|
||||
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_API_RENDERER_H_
|
|
@ -1,250 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: resultiterator.h
|
||||
// Description: Iterator for tesseract results that is capable of
|
||||
// iterating in proper reading order over Bi Directional
|
||||
// (e.g. mixed Hebrew and English) text.
|
||||
// Author: David Eger
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
||||
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
||||
|
||||
#include "export.h" // for TESS_API, TESS_LOCAL
|
||||
#include "ltrresultiterator.h" // for LTRResultIterator
|
||||
#include "publictypes.h" // for PageIteratorLevel
|
||||
#include "unichar.h" // for StrongScriptDirection
|
||||
|
||||
#include <set> // for std::pair
|
||||
#include <vector> // for std::vector
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class TESS_API ResultIterator : public LTRResultIterator {
|
||||
public:
|
||||
static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
|
||||
|
||||
/**
|
||||
* ResultIterator is copy constructible!
|
||||
* The default copy constructor works just fine for us.
|
||||
*/
|
||||
~ResultIterator() override = default;
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
/**
|
||||
* Moves the iterator to point to the start of the page to begin
|
||||
* an iteration.
|
||||
*/
|
||||
void Begin() override;
|
||||
|
||||
/**
|
||||
* Moves to the start of the next object at the given level in the
|
||||
* page hierarchy in the appropriate reading order and returns false if
|
||||
* the end of the page was reached.
|
||||
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
|
||||
* PageIteratorLevel level values will visit each non-text block once.
|
||||
* Think of non text blocks as containing a single para, with a single line,
|
||||
* with a single imaginary word.
|
||||
* Calls to Next with different levels may be freely intermixed.
|
||||
* This function iterates words in right-to-left scripts correctly, if
|
||||
* the appropriate language has been loaded into Tesseract.
|
||||
*/
|
||||
bool Next(PageIteratorLevel level) override;
|
||||
|
||||
/**
|
||||
* IsAtBeginningOf() returns whether we're at the logical beginning of the
|
||||
* given level. (as opposed to ResultIterator's left-to-right top-to-bottom
|
||||
* order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
|
||||
* For a full description, see pageiterator.h
|
||||
*/
|
||||
bool IsAtBeginningOf(PageIteratorLevel level) const override;
|
||||
|
||||
/**
|
||||
* Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
|
||||
* For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
|
||||
* point at the last word in a paragraph. See PageIterator for full comment.
|
||||
*/
|
||||
bool IsAtFinalElement(PageIteratorLevel level,
|
||||
PageIteratorLevel element) const override;
|
||||
|
||||
// ============= Functions that refer to words only ============.
|
||||
// Returns the number of blanks before the current word.
|
||||
int BlanksBeforeWord() const;
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
/**
|
||||
* Returns the null terminated UTF-8 encoded text string for the current
|
||||
* object at the given level. Use delete [] to free after use.
|
||||
*/
|
||||
virtual char *GetUTF8Text(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns the LSTM choices for every LSTM timestep for the current word.
|
||||
*/
|
||||
virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
|
||||
*GetRawLSTMTimesteps() const;
|
||||
virtual std::vector<std::vector<std::pair<const char *, float>>>
|
||||
*GetBestLSTMSymbolChoices() const;
|
||||
|
||||
/**
|
||||
* Return whether the current paragraph's dominant reading direction
|
||||
* is left-to-right (as opposed to right-to-left).
|
||||
*/
|
||||
bool ParagraphIsLtr() const;
|
||||
|
||||
// ============= Exposed only for testing =============.
|
||||
|
||||
/**
|
||||
* Yields the reading order as a sequence of indices and (optional)
|
||||
* meta-marks for a set of words (given left-to-right).
|
||||
* The meta marks are passed as negative values:
|
||||
* kMinorRunStart Start of minor direction text.
|
||||
* kMinorRunEnd End of minor direction text.
|
||||
* kComplexWord The next indexed word contains both left-to-right and
|
||||
* right-to-left characters and was treated as neutral.
|
||||
*
|
||||
* For example, suppose we have five words in a text line,
|
||||
* indexed [0,1,2,3,4] from the leftmost side of the text line.
|
||||
* The following are all believable reading_orders:
|
||||
*
|
||||
* Left-to-Right (in ltr paragraph):
|
||||
* { 0, 1, 2, 3, 4 }
|
||||
* Left-to-Right (in rtl paragraph):
|
||||
* { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
|
||||
* Right-to-Left (in rtl paragraph):
|
||||
* { 4, 3, 2, 1, 0 }
|
||||
* Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
|
||||
* { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
|
||||
*/
|
||||
static void CalculateTextlineOrder(
|
||||
bool paragraph_is_ltr,
|
||||
const std::vector<StrongScriptDirection> &word_dirs,
|
||||
std::vector<int> *reading_order);
|
||||
|
||||
static const int kMinorRunStart;
|
||||
static const int kMinorRunEnd;
|
||||
static const int kComplexWord;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* We presume the data associated with the given iterator will outlive us.
|
||||
* NB: This is private because it does something that is non-obvious:
|
||||
* it resets to the beginning of the paragraph instead of staying wherever
|
||||
* resit might have pointed.
|
||||
*/
|
||||
explicit ResultIterator(const LTRResultIterator &resit);
|
||||
|
||||
private:
|
||||
/**
|
||||
* Calculates the current paragraph's dominant writing direction.
|
||||
* Typically, members should use current_paragraph_ltr_ instead.
|
||||
*/
|
||||
bool CurrentParagraphIsLtr() const;
|
||||
|
||||
/**
|
||||
* Returns word indices as measured from resit->RestartRow() = index 0
|
||||
* for the reading order of words within a textline given an iterator
|
||||
* into the middle of the text line.
|
||||
* In addition to non-negative word indices, the following negative values
|
||||
* may be inserted:
|
||||
* kMinorRunStart Start of minor direction text.
|
||||
* kMinorRunEnd End of minor direction text.
|
||||
* kComplexWord The previous word contains both left-to-right and
|
||||
* right-to-left characters and was treated as neutral.
|
||||
*/
|
||||
void CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
const LTRResultIterator &resit,
|
||||
std::vector<int> *indices) const;
|
||||
/** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
|
||||
void CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
const LTRResultIterator &resit,
|
||||
std::vector<StrongScriptDirection> *ssd,
|
||||
std::vector<int> *indices) const;
|
||||
|
||||
/**
|
||||
* What is the index of the current word in a strict left-to-right reading
|
||||
* of the row?
|
||||
*/
|
||||
int LTRWordIndex() const;
|
||||
|
||||
/**
|
||||
* Given an iterator pointing at a word, returns the logical reading order
|
||||
* of blob indices for the word.
|
||||
*/
|
||||
void CalculateBlobOrder(std::vector<int> *blob_indices) const;
|
||||
|
||||
/** Precondition: current_paragraph_is_ltr_ is set. */
|
||||
void MoveToLogicalStartOfTextline();
|
||||
|
||||
/**
|
||||
* Precondition: current_paragraph_is_ltr_ and in_minor_direction_
|
||||
* are set.
|
||||
*/
|
||||
void MoveToLogicalStartOfWord();
|
||||
|
||||
/** Are we pointing at the final (reading order) symbol of the word? */
|
||||
bool IsAtFinalSymbolOfWord() const;
|
||||
|
||||
/** Are we pointing at the first (reading order) symbol of the word? */
|
||||
bool IsAtFirstSymbolOfWord() const;
|
||||
|
||||
/**
|
||||
* Append any extra marks that should be appended to this word when printed.
|
||||
* Mostly, these are Unicode BiDi control characters.
|
||||
*/
|
||||
void AppendSuffixMarks(std::string *text) const;
|
||||
|
||||
/** Appends the current word in reading order to the given buffer.*/
|
||||
void AppendUTF8WordText(std::string *text) const;
|
||||
|
||||
/**
|
||||
* Appends the text of the current text line, *assuming this iterator is
|
||||
* positioned at the beginning of the text line* This function
|
||||
* updates the iterator to point to the first position past the text line.
|
||||
* Each textline is terminated in a single newline character.
|
||||
* If the textline ends a paragraph, it gets a second terminal newline.
|
||||
*/
|
||||
void IterateAndAppendUTF8TextlineText(std::string *text);
|
||||
|
||||
/**
|
||||
* Appends the text of the current paragraph in reading order
|
||||
* to the given buffer.
|
||||
* Each textline is terminated in a single newline character, and the
|
||||
* paragraph gets an extra newline at the end.
|
||||
*/
|
||||
void AppendUTF8ParagraphText(std::string *text) const;
|
||||
|
||||
/** Returns whether the bidi_debug flag is set to at least min_level. */
|
||||
bool BidiDebug(int min_level) const;
|
||||
|
||||
bool current_paragraph_is_ltr_;
|
||||
|
||||
/**
|
||||
* Is the currently pointed-at character at the beginning of
|
||||
* a minor-direction run?
|
||||
*/
|
||||
bool at_beginning_of_minor_run_;
|
||||
|
||||
/** Is the currently pointed-at character in a minor-direction sequence? */
|
||||
bool in_minor_direction_;
|
||||
|
||||
/**
|
||||
* Should detected inter-word spaces be preserved, or "compressed" to a single
|
||||
* space character (default behavior).
|
||||
*/
|
||||
bool preserve_interword_spaces_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
|
|
@ -1,174 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: unichar.h
|
||||
// Description: Unicode character/ligature class.
|
||||
// Author: Ray Smith
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCUTIL_UNICHAR_H_
|
||||
#define TESSERACT_CCUTIL_UNICHAR_H_
|
||||
|
||||
#include "export.h"
|
||||
|
||||
#include <memory.h>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Maximum number of characters that can be stored in a UNICHAR. Must be
|
||||
// at least 4. Must not exceed 31 without changing the coding of length.
|
||||
#define UNICHAR_LEN 30
|
||||
|
||||
// A UNICHAR_ID is the unique id of a unichar.
|
||||
using UNICHAR_ID = int;
|
||||
|
||||
// A variable to indicate an invalid or uninitialized unichar id.
|
||||
static const int INVALID_UNICHAR_ID = -1;
|
||||
// A special unichar that corresponds to INVALID_UNICHAR_ID.
|
||||
static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
|
||||
|
||||
enum StrongScriptDirection {
|
||||
DIR_NEUTRAL = 0, // Text contains only neutral characters.
|
||||
DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
|
||||
DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
|
||||
DIR_MIX = 3, // Text contains a mixture of left-to-right
|
||||
// and right-to-left characters.
|
||||
};
|
||||
|
||||
using char32 = signed int;
|
||||
|
||||
// The UNICHAR class holds a single classification result. This may be
|
||||
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
|
||||
// multiple Unicode characters representing the NFKC expansion of a ligature
|
||||
// such as fi, ffl etc. These are also stored as utf8.
|
||||
class TESS_API UNICHAR {
|
||||
public:
|
||||
UNICHAR() {
|
||||
memset(chars, 0, UNICHAR_LEN);
|
||||
}
|
||||
|
||||
// Construct from a utf8 string. If len<0 then the string is null terminated.
|
||||
// If the string is too long to fit in the UNICHAR then it takes only what
|
||||
// will fit.
|
||||
UNICHAR(const char *utf8_str, int len);
|
||||
|
||||
// Construct from a single UCS4 character.
|
||||
explicit UNICHAR(int unicode);
|
||||
|
||||
// Default copy constructor and operator= are OK.
|
||||
|
||||
// Get the first character as UCS-4.
|
||||
int first_uni() const;
|
||||
|
||||
// Get the length of the UTF8 string.
|
||||
int utf8_len() const {
|
||||
int len = chars[UNICHAR_LEN - 1];
|
||||
return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
|
||||
}
|
||||
|
||||
// Get a UTF8 string, but NOT nullptr terminated.
|
||||
const char *utf8() const {
|
||||
return chars;
|
||||
}
|
||||
|
||||
// Get a terminated UTF8 string: Must delete[] it after use.
|
||||
char *utf8_str() const;
|
||||
|
||||
// Get the number of bytes in the first character of the given utf8 string.
|
||||
static int utf8_step(const char *utf8_str);
|
||||
|
||||
// A class to simplify iterating over and accessing elements of a UTF8
|
||||
// string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
|
||||
// take ownership of the underlying byte array. It also does not permit
|
||||
// modification of the array (as the name suggests).
|
||||
//
|
||||
// Example:
|
||||
// for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
|
||||
// it != UNICHAR::end(str, len);
|
||||
// ++it) {
|
||||
// printf("UCS-4 symbol code = %d\n", *it);
|
||||
// char buf[5];
|
||||
// int char_len = it.get_utf8(buf); buf[char_len] = '\0';
|
||||
// printf("Char = %s\n", buf);
|
||||
// }
|
||||
class TESS_API const_iterator {
|
||||
using CI = const_iterator;
|
||||
|
||||
public:
|
||||
// Step to the next UTF8 character.
|
||||
// If the current position is at an illegal UTF8 character, then print an
|
||||
// error message and step by one byte. If the current position is at a
|
||||
// nullptr value, don't step past it.
|
||||
const_iterator &operator++();
|
||||
|
||||
// Return the UCS-4 value at the current position.
|
||||
// If the current position is at an illegal UTF8 value, return a single
|
||||
// space character.
|
||||
int operator*() const;
|
||||
|
||||
// Store the UTF-8 encoding of the current codepoint into buf, which must be
|
||||
// at least 4 bytes long. Return the number of bytes written.
|
||||
// If the current position is at an illegal UTF8 value, writes a single
|
||||
// space character and returns 1.
|
||||
// Note that this method does not null-terminate the buffer.
|
||||
int get_utf8(char *buf) const;
|
||||
// Returns the number of bytes of the current codepoint. Returns 1 if the
|
||||
// current position is at an illegal UTF8 value.
|
||||
int utf8_len() const;
|
||||
// Returns true if the UTF-8 encoding at the current position is legal.
|
||||
bool is_legal() const;
|
||||
|
||||
// Return the pointer into the string at the current position.
|
||||
const char *utf8_data() const {
|
||||
return it_;
|
||||
}
|
||||
|
||||
// Iterator equality operators.
|
||||
friend bool operator==(const CI &lhs, const CI &rhs) {
|
||||
return lhs.it_ == rhs.it_;
|
||||
}
|
||||
friend bool operator!=(const CI &lhs, const CI &rhs) {
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
private:
|
||||
friend class UNICHAR;
|
||||
explicit const_iterator(const char *it) : it_(it) {}
|
||||
|
||||
const char *it_; // Pointer into the string.
|
||||
};
|
||||
|
||||
// Create a start/end iterator pointing to a string. Note that these methods
|
||||
// are static and do NOT create a copy or take ownership of the underlying
|
||||
// array.
|
||||
static const_iterator begin(const char *utf8_str, int byte_length);
|
||||
static const_iterator end(const char *utf8_str, int byte_length);
|
||||
|
||||
// Converts a utf-8 string to a vector of unicodes.
|
||||
// Returns an empty vector if the input contains invalid UTF-8.
|
||||
static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
|
||||
// Converts a vector of unicodes to a utf8 string.
|
||||
// Returns an empty string if the input contains an invalid unicode.
|
||||
static std::string UTF32ToUTF8(const std::vector<char32> &str32);
|
||||
|
||||
private:
|
||||
// A UTF-8 representation of 1 or more Unicode characters.
|
||||
// The last element (chars[UNICHAR_LEN - 1]) is a length if
|
||||
// its value < UNICHAR_LEN, otherwise it is a genuine character.
|
||||
char chars[UNICHAR_LEN]{};
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCUTIL_UNICHAR_H_
|
|
@ -1,34 +0,0 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
// File: version.h
|
||||
// Description: Version information
|
||||
//
|
||||
// (C) Copyright 2018, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_API_VERSION_H_
|
||||
#define TESSERACT_API_VERSION_H_
|
||||
|
||||
// clang-format off
|
||||
|
||||
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
|
||||
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
|
||||
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
|
||||
|
||||
#define TESSERACT_VERSION \
|
||||
(TESSERACT_MAJOR_VERSION << 16 | \
|
||||
TESSERACT_MINOR_VERSION << 8 | \
|
||||
TESSERACT_MICRO_VERSION)
|
||||
|
||||
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
|
||||
|
||||
// clang-format on
|
||||
|
||||
#endif // TESSERACT_API_VERSION_H_
|
Loading…
Reference in New Issue