tesseract-ocr优化文本方向识别速度

This commit is contained in:
luoliangyi 2023-02-27 17:55:16 +08:00
parent 2ac303d45a
commit e4990ab7e4
11 changed files with 5196 additions and 15 deletions

View File

@ -171,20 +171,7 @@ HGResult HGOCRTesseract::ImageTextDirectOCR(HGImage image, HGUInt* direct)
HGBase_GetImageDpi(image2, &xDpi, &yDpi); HGBase_GetImageDpi(image2, &xDpi, &yDpi);
TessBaseAPISetSourceResolution(m_baseApi, (xDpi + yDpi) / 2); TessBaseAPISetSourceResolution(m_baseApi, (xDpi + yDpi) / 2);
TessPageIterator* iter = TessBaseAPIAnalyseLayout(m_baseApi); int orientation = MyOSD(m_baseApi);
if (NULL == iter)
{
if (image2 != image)
HGBase_DestroyImage(image2);
return HGIMGPROC_ERR_OCR;
}
TessOrientation orientation;
TessWritingDirection writing_direction;
TessTextlineOrder textline_order;
float deskew_angle;
TessPageIteratorOrientation(iter, &orientation, &writing_direction, &textline_order, &deskew_angle);
if (TessOrientation::ORIENTATION_PAGE_UP == orientation) if (TessOrientation::ORIENTATION_PAGE_UP == orientation)
*direct = HGIMGPROC_OCRTEXTDIRECT_ORI; *direct = HGIMGPROC_OCRTEXTDIRECT_ORI;
else if (TessOrientation::ORIENTATION_PAGE_RIGHT == orientation) else if (TessOrientation::ORIENTATION_PAGE_RIGHT == orientation)
@ -194,7 +181,6 @@ HGResult HGOCRTesseract::ImageTextDirectOCR(HGImage image, HGUInt* direct)
else if (TessOrientation::ORIENTATION_PAGE_LEFT == orientation) else if (TessOrientation::ORIENTATION_PAGE_LEFT == orientation)
*direct = HGIMGPROC_OCRTEXTDIRECT_LEFT; *direct = HGIMGPROC_OCRTEXTDIRECT_LEFT;
TessPageIteratorDelete(iter);
if (image2 != image) if (image2 != image)
HGBase_DestroyImage(image2); HGBase_DestroyImage(image2);
return HGBASE_ERR_OK; return HGBASE_ERR_OK;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,946 @@
///////////////////////////////////////////////////////////////////////
// File: baseapi.h
// Description: Simple API for calling tesseract.
// Author: Ray Smith
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_API_BASEAPI_H_
#define TESSERACT_API_BASEAPI_H_
#include <cstdio>
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible
// and hide includes of complex types in baseapi.cpp.
#include "apitypes.h"
#include "pageiterator.h"
#include "platform.h"
#include "publictypes.h"
#include "resultiterator.h"
#include "serialis.h"
#include "tess_version.h"
#include "tesscallback.h"
#include "thresholder.h"
#include "unichar.h"
template <typename T> class GenericVector;
class PAGE_RES;
class PAGE_RES_IT;
class ParagraphModel;
struct BlamerBundle;
class BLOCK_LIST;
class DENORM;
class MATRIX;
class ROW;
class STRING;
class WERD;
struct Pix;
struct Box;
struct Pixa;
struct Boxa;
class ETEXT_DESC;
struct OSResults;
class TBOX;
class UNICHARSET;
class WERD_CHOICE_LIST;
struct INT_FEATURE_STRUCT;
using INT_FEATURE = INT_FEATURE_STRUCT *;
struct TBLOB;
namespace tesseract {
class Dawg;
class Dict;
class EquationDetect;
class PageIterator;
class LTRResultIterator;
class ResultIterator;
class MutableIterator;
class TessResultRenderer;
class Tesseract;
class Trie;
class Wordrec;
using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const;
using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *, int, const char *, int);
using ParamsModelClassifyFunc = float (Dict::*)(const char *, void *);
using FillLatticeFunc = void (Wordrec::*)(const MATRIX &, const WERD_CHOICE_LIST &, const UNICHARSET &, BlamerBundle *);
typedef TessCallback4<const UNICHARSET &, int, PageIterator *, Pix *>
TruthCallback;
/**
* Base class for all tesseract APIs.
* Specific classes can add ability to work on different inputs or produce
* different outputs.
* This class is mostly an interface layer on top of the Tesseract instance
* class to hide the data types so that users of this class don't have to
* include any other Tesseract headers.
*/
class TESS_API TessBaseAPI {
public:
TessBaseAPI();
virtual ~TessBaseAPI();
int MyOSD();
/**
* Returns the version identifier as a static string. Do not delete.
*/
static const char* Version();
/**
* If compiled with OpenCL AND an available OpenCL
* device is deemed faster than serial code, then
* "device" is populated with the cl_device_id
* and returns sizeof(cl_device_id)
* otherwise *device=nullptr and returns 0.
*/
static size_t getOpenCLDevice(void **device);
/**
* Writes the thresholded image to stderr as a PBM file on receipt of a
* SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only).
*/
static void CatchSignals();
/**
* Set the name of the input file. Needed for training and
* reading a UNLV zone file, and for searchable PDF output.
*/
void SetInputName(const char* name);
/**
* These functions are required for searchable PDF output.
* We need our hands on the input file so that we can include
* it in the PDF without transcoding. If that is not possible,
* we need the original image. Finally, resolution metadata
* is stored in the PDF so we need that as well.
*/
const char* GetInputName();
// Takes ownership of the input pix.
void SetInputImage(Pix *pix);
Pix* GetInputImage();
int GetSourceYResolution();
const char* GetDatapath();
/** Set the name of the bonus output files. Needed only for debugging. */
void SetOutputName(const char* name);
/**
* Set the value of an internal "parameter."
* Supply the name of the parameter and the value as a string, just as
* you would in a config file.
* Returns false if the name lookup failed.
* Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
* Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
* SetVariable may be used before Init, but settings will revert to
* defaults on End().
*
* Note: Must be called after Init(). Only works for non-init variables
* (init variables should be passed to Init()).
*/
bool SetVariable(const char* name, const char* value);
bool SetDebugVariable(const char* name, const char* value);
/**
* Returns true if the parameter was found among Tesseract parameters.
* Fills in value with the value of the parameter.
*/
bool GetIntVariable(const char *name, int *value) const;
bool GetBoolVariable(const char *name, bool *value) const;
bool GetDoubleVariable(const char *name, double *value) const;
/**
* Returns the pointer to the string that represents the value of the
* parameter if it was found among Tesseract parameters.
*/
const char *GetStringVariable(const char *name) const;
/**
* Print Tesseract parameters to the given file.
*/
void PrintVariables(FILE *fp) const;
/**
* Get value of named variable as a string, if it exists.
*/
bool GetVariableAsString(const char *name, STRING *val);
/**
* Instances are now mostly thread-safe and totally independent,
* but some global parameters remain. Basically it is safe to use multiple
* TessBaseAPIs in different threads in parallel, UNLESS:
* you use SetVariable on some of the Params in classify and textord.
* If you do, then the effect will be to change it for all your instances.
*
* Start tesseract. Returns zero on success and -1 on failure.
* NOTE that the only members that may be called before Init are those
* listed above here in the class definition.
*
* The datapath must be the name of the tessdata directory.
* The language is (usually) an ISO 639-3 string or nullptr will default to eng.
* It is entirely safe (and eventually will be efficient too) to call
* Init multiple times on the same instance to change language, or just
* to reset the classifier.
* The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
* that multiple languages are to be loaded. Eg hin+eng will load Hindi and
* English. Languages may specify internally that they want to be loaded
* with one or more other languages, so the ~ sign is available to override
* that. Eg if hin were set to load eng by default, then hin+~eng would force
* loading only hin. The number of loaded languages is limited only by
* memory, with the caveat that loading additional languages will impact
* both speed and accuracy, as there is more work to do to decide on the
* applicable language, and there is more chance of hallucinating incorrect
* words.
* WARNING: On changing languages, all Tesseract parameters are reset
* back to their default values. (Which may vary between languages.)
* If you have a rare need to set a Variable that controls
* initialization for a second call to Init you should explicitly
* call End() and then use SetVariable before Init. This is only a very
* rare use case, since there are very few uses that require any parameters
* to be set before Init.
*
* If set_only_non_debug_params is true, only params that do not contain
* "debug" in the name will be set.
*/
int Init(const char* datapath, const char* language, OcrEngineMode mode,
char **configs, int configs_size,
const GenericVector<STRING> *vars_vec,
const GenericVector<STRING> *vars_values,
bool set_only_non_debug_params);
int Init(const char* datapath, const char* language, OcrEngineMode oem) {
return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
}
int Init(const char* datapath, const char* language) {
return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false);
}
// In-memory version reads the traineddata file directly from the given
// data[data_size] array, and/or reads data via a FileReader.
int Init(const char* data, int data_size, const char* language,
OcrEngineMode mode, char** configs, int configs_size,
const GenericVector<STRING>* vars_vec,
const GenericVector<STRING>* vars_values,
bool set_only_non_debug_params, FileReader reader);
/**
* Returns the languages string used in the last valid initialization.
* If the last initialization specified "deu+hin" then that will be
* returned. If hin loaded eng automatically as well, then that will
* not be included in this list. To find the languages actually
* loaded use GetLoadedLanguagesAsVector.
* The returned string should NOT be deleted.
*/
const char* GetInitLanguagesAsString() const;
/**
* Returns the loaded languages in the vector of STRINGs.
* Includes all languages loaded by the last Init, including those loaded
* as dependencies of other loaded languages.
*/
void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
/**
* Returns the available languages in the sorted vector of STRINGs.
*/
void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const;
/**
* Init only the lang model component of Tesseract. The only functions
* that work after this init are SetVariable and IsValidWord.
* WARNING: temporary! This function will be removed from here and placed
* in a separate API at some future time.
*/
int InitLangMod(const char* datapath, const char* language);
/**
* Init only for page layout analysis. Use only for calls to SetImage and
* AnalysePage. Calls that attempt recognition will generate an error.
*/
void InitForAnalysePage();
/**
* Read a "config" file containing a set of param, value pairs.
* Searches the standard places: tessdata/configs, tessdata/tessconfigs
* and also accepts a relative or absolute path name.
* Note: only non-init params will be set (init params are set by Init()).
*/
void ReadConfigFile(const char* filename);
/** Same as above, but only set debug params from the given config file. */
void ReadDebugConfigFile(const char* filename);
/**
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
* The mode is stored as an IntParam so it can also be modified by
* ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
*/
void SetPageSegMode(PageSegMode mode);
/** Return the current page segmentation mode. */
PageSegMode GetPageSegMode() const;
/**
* Recognize a rectangle from an image and return the result as a string.
* May be called many times for a single Init.
* Currently has no error checking.
* Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
* Palette color images will not work properly and must be converted to
* 24 bit.
* Binary images of 1 bit per pixel may also be given but they must be
* byte packed with the MSB of the first byte being the first pixel, and a
* 1 represents WHITE. For binary images set bytes_per_pixel=0.
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
*
* Note that TesseractRect is the simplified convenience interface.
* For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
* and one or more of the Get*Text functions below.
*/
char* TesseractRect(const unsigned char* imagedata,
int bytes_per_pixel, int bytes_per_line,
int left, int top, int width, int height);
/**
* Call between pages or documents etc to free up memory and forget
* adaptive data.
*/
void ClearAdaptiveClassifier();
/**
* @defgroup AdvancedAPI Advanced API
* The following methods break TesseractRect into pieces, so you can
* get hold of the thresholded image, get the text in different formats,
* get bounding boxes, confidences etc.
*/
/* @{ */
/**
* Provide an image for Tesseract to recognize. Format is as
* TesseractRect above. Copies the image buffer and converts to Pix.
* SetImage clears all recognition results, and sets the rectangle to the
* full image, so it may be followed immediately by a GetUTF8Text, and it
* will automatically perform recognition.
*/
void SetImage(const unsigned char* imagedata, int width, int height,
int bytes_per_pixel, int bytes_per_line);
/**
* Provide an image for Tesseract to recognize. As with SetImage above,
* Tesseract takes its own copy of the image, so it need not persist until
* after Recognize.
* Pix vs raw, which to use?
* Use Pix where possible. Tesseract uses Pix as its internal representation
* and it is therefore more efficient to provide a Pix directly.
*/
void SetImage(Pix* pix);
/**
* Set the resolution of the source image in pixels per inch so font size
* information can be calculated in results. Call this after SetImage().
*/
void SetSourceResolution(int ppi);
/**
* Restrict recognition to a sub-rectangle of the image. Call after SetImage.
* Each SetRectangle clears the recogntion results so multiple rectangles
* can be recognized with the same image.
*/
void SetRectangle(int left, int top, int width, int height);
/**
* In extreme cases only, usually with a subclass of Thresholder, it
* is possible to provide a different Thresholder. The Thresholder may
* be preloaded with an image, settings etc, or they may be set after.
* Note that Tesseract takes ownership of the Thresholder and will
* delete it when it it is replaced or the API is destructed.
*/
void SetThresholder(ImageThresholder* thresholder) {
delete thresholder_;
thresholder_ = thresholder;
ClearResults();
}
/**
* Get a copy of the internal thresholded image from Tesseract.
* Caller takes ownership of the Pix and must pixDestroy it.
* May be called any time after SetImage, or after TesseractRect.
*/
Pix* GetThresholdedImage();
/**
* Get the result of page layout analysis as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
*/
Boxa* GetRegions(Pixa** pixa);
/**
* Get the textlines as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* If raw_image is true, then extract from the original image instead of the
* thresholded image and pad by raw_padding pixels.
* If blockids is not nullptr, the block-id of each line is also returned as an
* array of one element per line. delete [] after use.
* If paraids is not nullptr, the paragraph-id of each line within its block is
* also returned as an array of one element per line. delete [] after use.
*/
Boxa* GetTextlines(bool raw_image, int raw_padding,
Pixa** pixa, int** blockids, int** paraids);
/*
Helper method to extract from the thresholded image. (most common usage)
*/
Boxa* GetTextlines(Pixa** pixa, int** blockids) {
return GetTextlines(false, 0, pixa, blockids, nullptr);
}
/**
* Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
* pair, in reading order. Enables downstream handling of non-rectangular
* regions.
* Can be called before or after Recognize.
* If blockids is not nullptr, the block-id of each line is also returned as an
* array of one element per line. delete [] after use.
*/
Boxa* GetStrips(Pixa** pixa, int** blockids);
/**
* Get the words as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
*/
Boxa* GetWords(Pixa** pixa);
/**
* Gets the individual connected (text) components (created
* after pages segmentation step, but before recognition)
* as a leptonica-style Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* Note: the caller is responsible for calling boxaDestroy()
* on the returned Boxa array and pixaDestroy() on cc array.
*/
Boxa* GetConnectedComponents(Pixa** cc);
/**
* Get the given level kind of components (block, textline, word etc.) as a
* leptonica-style Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* If blockids is not nullptr, the block-id of each component is also returned
* as an array of one element per component. delete [] after use.
* If blockids is not nullptr, the paragraph-id of each component with its block
* is also returned as an array of one element per component. delete [] after
* use.
* If raw_image is true, then portions of the original image are extracted
* instead of the thresholded image and padded with raw_padding.
* If text_only is true, then only text components are returned.
*/
Boxa* GetComponentImages(PageIteratorLevel level,
bool text_only, bool raw_image,
int raw_padding,
Pixa** pixa, int** blockids, int** paraids);
// Helper function to get binary images with no padding (most common usage).
Boxa* GetComponentImages(const PageIteratorLevel level,
const bool text_only,
Pixa** pixa, int** blockids) {
return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr);
}
/**
* Returns the scale factor of the thresholded image that would be returned by
* GetThresholdedImage() and the various GetX() methods that call
* GetComponentImages().
* Returns 0 if no thresholder has been set.
*/
int GetThresholdedImageScaleFactor() const;
/**
* Runs page layout analysis in the mode set by SetPageSegMode.
* May optionally be called prior to Recognize to get access to just
* the page layout results. Returns an iterator to the results.
* If merge_similar_words is true, words are combined where suitable for use
* with a line recognizer. Use if you want to use AnalyseLayout to find the
* textlines, and then want to process textline fragments with an external
* line recognizer.
* Returns nullptr on error or an empty page.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
PageIterator* AnalyseLayout();
PageIterator* AnalyseLayout(bool merge_similar_words);
/**
* Recognize the image from SetAndThresholdImage, generating Tesseract
* internal structures. Returns 0 on success.
* Optional. The Get*Text functions below will call Recognize if needed.
* After Recognize, the output is kept internally until the next SetImage.
*/
int Recognize(ETEXT_DESC* monitor);
/**
* Methods to retrieve information after SetAndThresholdImage(),
* Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
*/
#ifndef DISABLED_LEGACY_ENGINE
/** Variant on Recognize used for testing chopper. */
int RecognizeForChopTest(ETEXT_DESC* monitor);
#endif
/**
* Turns images into symbolic text.
*
* filename can point to a single image, a multi-page TIFF,
* or a plain text list of image filenames.
*
* retry_config is useful for debugging. If not nullptr, you can fall
* back to an alternate configuration if a page fails for some
* reason.
*
* timeout_millisec terminates processing if any single page
* takes too long. Set to 0 for unlimited time.
*
* renderer is responible for creating the output. For example,
* use the TessTextRenderer if you want plaintext output, or
* the TessPDFRender to produce searchable PDF.
*
* If tessedit_page_number is non-negative, will only process that
* single page. Works for multi-page tiff file, or filelist.
*
* Returns true if successful, false on error.
*/
bool ProcessPages(const char* filename, const char* retry_config,
int timeout_millisec, TessResultRenderer* renderer);
// Does the real work of ProcessPages.
bool ProcessPagesInternal(const char* filename, const char* retry_config,
int timeout_millisec, TessResultRenderer* renderer);
/**
* Turn a single image into symbolic text.
*
* The pix is the image processed. filename and page_index are
* metadata used by side-effect processes, such as reading a box
* file or formatting as hOCR.
*
* See ProcessPages for desciptions of other parameters.
*/
bool ProcessPage(Pix* pix, int page_index, const char* filename,
const char* retry_config, int timeout_millisec,
TessResultRenderer* renderer);
/**
* Get a reading-order iterator to the results of LayoutAnalysis and/or
* Recognize. The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
ResultIterator* GetIterator();
/**
* Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
MutableIterator* GetMutableIterator();
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
*/
char* GetUTF8Text();
/**
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
* monitor can be used to
* cancel the recognition
* receive progress callbacks
* Returned string must be freed with the delete [] operator.
*/
char* GetHOCRText(ETEXT_DESC* monitor, int page_number);
/**
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char* GetHOCRText(int page_number);
/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
*/
char* GetAltoText(ETEXT_DESC* monitor, int page_number);
/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
*/
char* GetAltoText(int page_number);
/**
* Make a TSV-formatted string from the internal data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char* GetTSVText(int page_number);
/**
* Make a box file for LSTM training from the internal data structures.
* Constructs coordinates in the original image - not just the rectangle.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char* GetLSTMBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded in the same
* format as a box file used in training.
* Constructs coordinates in the original image - not just the rectangle.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char* GetBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded in the same
* format as a WordStr box file used in training.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char* GetWordStrBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded
* as UNLV format Latin-1 with specific reject and suspect codes.
* Returned string must be freed with the delete [] operator.
*/
char* GetUNLVText();
/**
* Detect the orientation of the input image and apparent script (alphabet).
* orient_deg is the detected clockwise rotation of the input image in degrees
* (0, 90, 180, 270)
* orient_conf is the confidence (15.0 is reasonably confident)
* script_name is an ASCII string, the name of the script, e.g. "Latin"
* script_conf is confidence level in the script
* Returns true on success and writes values to each parameter as an output
*/
bool DetectOrientationScript(int* orient_deg, float* orient_conf,
const char** script_name, float* script_conf);
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
* page_number is a 0-based page index that will appear in the osd file.
*/
char* GetOsdText(int page_number);
/** Returns the (average) confidence value between 0 and 100. */
int MeanTextConf();
/**
* Returns all word confidences (between 0 and 100) in an array, terminated
* by -1. The calling function must delete [] after use.
* The number of confidences should correspond to the number of space-
* delimited words in GetUTF8Text.
*/
int* AllWordConfidences();
#ifndef DISABLED_LEGACY_ENGINE
/**
* Applies the given word to the adaptive classifier if possible.
* The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
* tell the boundaries of the graphemes.
* Assumes that SetImage/SetRectangle have been used to set the image
* to the given word. The mode arg should be PSM_SINGLE_WORD or
* PSM_CIRCLE_WORD, as that will be used to control layout analysis.
* The currently set PageSegMode is preserved.
* Returns false if adaption was not possible for some reason.
*/
bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
#endif // ndef DISABLED_LEGACY_ENGINE
/**
* Free up recognition results and any stored image data, without actually
* freeing any recognition data that would be time-consuming to reload.
* Afterwards, you must call SetImage or TesseractRect before doing
* any Recognize or Get* operation.
*/
void Clear();
/**
* Close down tesseract and free up all memory. End() is equivalent to
* destructing and reconstructing your TessBaseAPI.
* Once End() has been used, none of the other API functions may be used
* other than Init and anything declared above it in the class definition.
*/
void End();
/**
* Clear any library-level memory caches.
* There are a variety of expensive-to-load constant data structures (mostly
* language dictionaries) that are cached globally -- surviving the Init()
* and End() of individual TessBaseAPI's. This function allows the clearing
* of these caches.
**/
static void ClearPersistentCache();
/**
* Check whether a word is valid according to Tesseract's language model
* @return 0 if the word is invalid, non-zero if valid.
* @warning temporary! This function will be removed from here and placed
* in a separate API at some future time.
*/
int IsValidWord(const char *word);
// Returns true if utf8_character is defined in the UniCharset.
bool IsValidCharacter(const char *utf8_character);
bool GetTextDirection(int* out_offset, float* out_slope);
/** Sets Dict::letter_is_okay_ function to point to the given function. */
void SetDictFunc(DictFunc f);
/** Sets Dict::probability_in_context_ function to point to the given
* function.
*/
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
/**
* Estimates the Orientation And Script of the image.
* @return true if the image was processed successfully.
*/
bool DetectOS(OSResults*);
/**
* Return text orientation of each block as determined by an earlier run
* of layout analysis.
*/
void GetBlockTextOrientations(int** block_orientation,
bool** vertical_writing);
#ifndef DISABLED_LEGACY_ENGINE
/** Sets Wordrec::fill_lattice_ function to point to the given function. */
void SetFillLatticeFunc(FillLatticeFunc f);
/** Find lines from the image making the BLOCK_LIST. */
BLOCK_LIST* FindLinesCreateBlockList();
/**
* Delete a block list.
* This is to keep BLOCK_LIST pointer opaque
* and let go of including the other headers.
*/
static void DeleteBlockList(BLOCK_LIST* block_list);
/** Returns a ROW object created from the input row specification. */
static ROW *MakeTessOCRRow(float baseline, float xheight,
float descender, float ascender);
/** Returns a TBLOB corresponding to the entire input image. */
static TBLOB *MakeTBLOB(Pix *pix);
/**
* This method baseline normalizes a TBLOB in-place. The input row is used
* for normalization. The denorm is an optional parameter in which the
* normalization-antidote is returned.
*/
static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
/** This method returns the features associated with the input image. */
void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
int* num_features, int* feature_outline_index);
/**
* This method returns the row to which a box of specified dimensions would
* belong. If no good match is found, it returns nullptr.
*/
static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
int right, int bottom);
/**
* Method to run adaptive classifier on a blob.
* It returns at max num_max_matches results.
*/
void RunAdaptiveClassifier(TBLOB* blob,
int num_max_matches,
int* unichar_ids,
float* ratings,
int* num_matches_returned);
#endif // ndef DISABLED_LEGACY_ENGINE
/** This method returns the string form of the specified unichar. */
const char* GetUnichar(int unichar_id);
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
const Dawg *GetDawg(int i) const;
/** Return the number of dawgs loaded into tesseract_ object. */
int NumDawgs() const;
Tesseract* tesseract() const { return tesseract_; }
OcrEngineMode oem() const { return last_oem_requested_; }
void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
void set_min_orientation_margin(double margin);
/* @} */
protected:
/** Common code for setting the image. Returns true if Init has been called. */
TESS_LOCAL bool InternalSetImage();
/**
* Run the thresholder to make the thresholded image. If pix is not nullptr,
* the source is thresholded to pix instead of the internal IMAGE.
*/
TESS_LOCAL virtual bool Threshold(Pix** pix);
/**
* Find lines from the image making the BLOCK_LIST.
* @return 0 on success.
*/
TESS_LOCAL int FindLines();
/** Delete the pageres and block list ready for a new page. */
void ClearResults();
/**
* Return an LTR Result Iterator -- used only for training, as we really want
* to ignore all BiDi smarts at that point.
* delete once you're done with it.
*/
TESS_LOCAL LTRResultIterator* GetLTRIterator();
/**
* Return the length of the output text string, as UTF8, assuming
* one newline per line and one per block, with a terminator,
* and assuming a single character reject marker for each rejected character.
* Also return the number of recognized blobs in blob_count.
*/
TESS_LOCAL int TextLength(int* blob_count);
//// paragraphs.cpp ////////////////////////////////////////////////////
TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
#ifndef DISABLED_LEGACY_ENGINE
/** @defgroup ocropusAddOns ocropus add-ons */
/* @{ */
/**
* Adapt to recognize the current image as the given character.
* The image must be preloaded and be just an image of a single character.
*/
TESS_LOCAL void AdaptToCharacter(const char *unichar_repr,
int length,
float baseline,
float xheight,
float descender,
float ascender);
/** Recognize text doing one pass only, using settings for a given pass. */
TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
PAGE_RES* pass1_result);
/**
* Extract the OCR results, costs (penalty points for uncertainty),
* and the bounding boxes of the characters.
*/
TESS_LOCAL static int TesseractExtractResult(char** text,
int** lengths,
float** costs,
int** x0,
int** y0,
int** x1,
int** y1,
PAGE_RES* page_res);
TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; }
/* @} */
#endif // ndef DISABLED_LEGACY_ENGINE
protected:
Tesseract* tesseract_; ///< The underlying data object.
Tesseract* osd_tesseract_; ///< For orientation & script detection.
EquationDetect* equ_detect_; ///<The equation detector.
FileReader reader_; ///< Reads files from any filesystem.
ImageThresholder* thresholder_; ///< Image thresholding module.
GenericVector<ParagraphModel *>* paragraph_models_;
BLOCK_LIST* block_list_; ///< The page layout.
PAGE_RES* page_res_; ///< The page-level data.
STRING* input_file_; ///< Name used by training code.
STRING* output_file_; ///< Name used by debug code.
STRING* datapath_; ///< Current location of tessdata.
STRING* language_; ///< Last initialized language.
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
bool recognition_done_; ///< page_res_ contains recognition data.
TruthCallback *truth_cb_; /// fxn for setting truth_* in WERD_RES
/**
* @defgroup ThresholderParams Thresholder Parameters
* Parameters saved from the Thresholder. Needed to rebuild coordinates.
*/
/* @{ */
int rect_left_;
int rect_top_;
int rect_width_;
int rect_height_;
int image_width_;
int image_height_;
/* @} */
private:
// A list of image filenames gets special consideration
bool ProcessPagesFileList(FILE *fp,
STRING *buf,
const char* retry_config, int timeout_millisec,
TessResultRenderer* renderer,
int tessedit_page_number);
// TIFF supports multipage so gets special consideration.
bool ProcessPagesMultipageTiff(const unsigned char *data,
size_t size,
const char* filename,
const char* retry_config,
int timeout_millisec,
TessResultRenderer* renderer,
int tessedit_page_number);
// There's currently no way to pass a document title from the
// Tesseract command line, and we have multiple places that choose
// to set the title to an empty string. Using a single named
// variable will hopefully reduce confusion if the situation changes
// in the future.
const char *unknown_title_ = "";
}; // class TessBaseAPI.
/** Escape a char string - remove &<>"' with HTML codes. */
STRING HOcrEscape(const char* text);
} // namespace tesseract.
#endif // TESSERACT_API_BASEAPI_H_

View File

@ -0,0 +1,919 @@
///////////////////////////////////////////////////////////////////////
// File: capi.cpp
// Description: C-API TessBaseAPI
//
// (C) Copyright 2012, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESS_CAPI_INCLUDE_BASEAPI
# define TESS_CAPI_INCLUDE_BASEAPI
#endif
#include "capi.h"
#include "genericvector.h"
#include "strngs.h"
TESS_API int MyOSD(TessBaseAPI* api) {
return api->MyOSD();
}
TESS_API const char* TESS_CALL TessVersion() {
return TessBaseAPI::Version();
}
TESS_API void TESS_CALL TessDeleteText(const char* text) {
delete[] text;
}
TESS_API void TESS_CALL TessDeleteTextArray(char** arr) {
for (char** pos = arr; *pos != nullptr; ++pos) {
delete[] * pos;
}
delete[] arr;
}
TESS_API void TESS_CALL TessDeleteIntArray(const int* arr) {
delete[] arr;
}
#ifndef DISABLED_LEGACY_ENGINE
TESS_API void TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list) {
TessBaseAPI::DeleteBlockList(block_list);
}
#endif
TESS_API TessResultRenderer* TESS_CALL
TessTextRendererCreate(const char* outputbase) {
return new TessTextRenderer(outputbase);
}
TESS_API TessResultRenderer* TESS_CALL
TessHOcrRendererCreate(const char* outputbase) {
return new TessHOcrRenderer(outputbase);
}
TESS_API TessResultRenderer* TESS_CALL
TessHOcrRendererCreate2(const char* outputbase, BOOL font_info) {
return new TessHOcrRenderer(outputbase, font_info != 0);
}
TESS_API TessResultRenderer* TESS_CALL
TessAltoRendererCreate(const char* outputbase) {
return new TessAltoRenderer(outputbase);
}
TESS_API TessResultRenderer* TESS_CALL
TessTsvRendererCreate(const char* outputbase) {
return new TessTsvRenderer(outputbase);
}
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(
const char* outputbase, const char* datadir, BOOL textonly) {
return new TessPDFRenderer(outputbase, datadir, textonly != 0);
}
TESS_API TessResultRenderer* TESS_CALL
TessUnlvRendererCreate(const char* outputbase) {
return new TessUnlvRenderer(outputbase);
}
TESS_API TessResultRenderer* TESS_CALL
TessBoxTextRendererCreate(const char* outputbase) {
return new TessBoxTextRenderer(outputbase);
}
TESS_API TessResultRenderer* TESS_CALL
TessWordStrBoxRendererCreate(const char* outputbase) {
return new TessWordStrBoxRenderer(outputbase);
}
TESS_API TessResultRenderer* TESS_CALL
TessLSTMBoxRendererCreate(const char* outputbase) {
return new TessLSTMBoxRenderer(outputbase);
}
TESS_API void TESS_CALL TessDeleteResultRenderer(TessResultRenderer* renderer) {
delete renderer;
}
TESS_API void TESS_CALL TessResultRendererInsert(TessResultRenderer* renderer,
TessResultRenderer* next) {
renderer->insert(next);
}
TESS_API TessResultRenderer* TESS_CALL
TessResultRendererNext(TessResultRenderer* renderer) {
return renderer->next();
}
TESS_API BOOL TESS_CALL TessResultRendererBeginDocument(
TessResultRenderer* renderer, const char* title) {
return static_cast<int>(renderer->BeginDocument(title));
}
TESS_API BOOL TESS_CALL TessResultRendererAddImage(TessResultRenderer* renderer,
TessBaseAPI* api) {
return static_cast<int>(renderer->AddImage(api));
}
TESS_API BOOL TESS_CALL
TessResultRendererEndDocument(TessResultRenderer* renderer) {
return static_cast<int>(renderer->EndDocument());
}
TESS_API const char* TESS_CALL
TessResultRendererExtention(TessResultRenderer* renderer) {
return renderer->file_extension();
}
TESS_API const char* TESS_CALL
TessResultRendererTitle(TessResultRenderer* renderer) {
return renderer->title();
}
TESS_API int TESS_CALL
TessResultRendererImageNum(TessResultRenderer* renderer) {
return renderer->imagenum();
}
TESS_API TessBaseAPI* TESS_CALL TessBaseAPICreate() {
return new TessBaseAPI;
}
TESS_API void TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle) {
delete handle;
}
TESS_API size_t TESS_CALL TessBaseAPIGetOpenCLDevice(TessBaseAPI* /*handle*/,
void** device) {
return TessBaseAPI::getOpenCLDevice(device);
}
TESS_API void TESS_CALL TessBaseAPISetInputName(TessBaseAPI* handle,
const char* name) {
handle->SetInputName(name);
}
TESS_API const char* TESS_CALL TessBaseAPIGetInputName(TessBaseAPI* handle) {
return handle->GetInputName();
}
TESS_API void TESS_CALL TessBaseAPISetInputImage(TessBaseAPI* handle,
Pix* pix) {
handle->SetInputImage(pix);
}
TESS_API Pix* TESS_CALL TessBaseAPIGetInputImage(TessBaseAPI* handle) {
return handle->GetInputImage();
}
TESS_API int TESS_CALL TessBaseAPIGetSourceYResolution(TessBaseAPI* handle) {
return handle->GetSourceYResolution();
}
TESS_API const char* TESS_CALL TessBaseAPIGetDatapath(TessBaseAPI* handle) {
return handle->GetDatapath();
}
TESS_API void TESS_CALL TessBaseAPISetOutputName(TessBaseAPI* handle,
const char* name) {
handle->SetOutputName(name);
}
TESS_API BOOL TESS_CALL TessBaseAPISetVariable(TessBaseAPI* handle,
const char* name,
const char* value) {
return static_cast<int>(handle->SetVariable(name, value));
}
TESS_API BOOL TESS_CALL TessBaseAPISetDebugVariable(TessBaseAPI* handle,
const char* name,
const char* value) {
return static_cast<int>(handle->SetDebugVariable(name, value));
}
TESS_API BOOL TESS_CALL TessBaseAPIGetIntVariable(const TessBaseAPI* handle,
const char* name,
int* value) {
return static_cast<int>(handle->GetIntVariable(name, value));
}
TESS_API BOOL TESS_CALL TessBaseAPIGetBoolVariable(const TessBaseAPI* handle,
const char* name,
BOOL* value) {
bool boolValue;
bool result = handle->GetBoolVariable(name, &boolValue);
if (result) {
*value = static_cast<int>(boolValue);
}
return static_cast<int>(result);
}
TESS_API BOOL TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle,
const char* name,
double* value) {
return static_cast<int>(handle->GetDoubleVariable(name, value));
}
TESS_API const char* TESS_CALL
TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name) {
return handle->GetStringVariable(name);
}
TESS_API void TESS_CALL TessBaseAPIPrintVariables(const TessBaseAPI* handle,
FILE* fp) {
handle->PrintVariables(fp);
}
TESS_API BOOL TESS_CALL TessBaseAPIPrintVariablesToFile(
const TessBaseAPI* handle, const char* filename) {
FILE* fp = fopen(filename, "w");
if (fp != nullptr) {
handle->PrintVariables(fp);
fclose(fp);
return TRUE;
}
return FALSE;
}
TESS_API BOOL TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle,
const char* name,
STRING* val) {
return static_cast<int>(handle->GetVariableAsString(name, val));
}
TESS_API int TESS_CALL TessBaseAPIInit4(
TessBaseAPI* handle, const char* datapath, const char* language,
TessOcrEngineMode mode, char** configs, int configs_size, char** vars_vec,
char** vars_values, size_t vars_vec_size, BOOL set_only_non_debug_params) {
GenericVector<STRING> varNames;
GenericVector<STRING> varValues;
if (vars_vec != nullptr && vars_values != nullptr) {
for (size_t i = 0; i < vars_vec_size; i++) {
varNames.push_back(STRING(vars_vec[i]));
varValues.push_back(STRING(vars_values[i]));
}
}
return handle->Init(datapath, language, mode, configs, configs_size,
&varNames, &varValues, set_only_non_debug_params != 0);
}
TESS_API int TESS_CALL TessBaseAPIInit1(TessBaseAPI* handle,
const char* datapath,
const char* language,
TessOcrEngineMode oem, char** configs,
int configs_size) {
return handle->Init(datapath, language, oem, configs, configs_size, nullptr,
nullptr, false);
}
TESS_API int TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle,
const char* datapath,
const char* language,
TessOcrEngineMode oem) {
return handle->Init(datapath, language, oem);
}
TESS_API int TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle,
const char* datapath,
const char* language) {
return handle->Init(datapath, language);
}
TESS_API const char* TESS_CALL
TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle) {
return handle->GetInitLanguagesAsString();
}
TESS_API char** TESS_CALL
TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle) {
GenericVector<STRING> languages;
handle->GetLoadedLanguagesAsVector(&languages);
char** arr = new char*[languages.size() + 1];
for (int index = 0; index < languages.size(); ++index) {
arr[index] = languages[index].strdup();
}
arr[languages.size()] = nullptr;
return arr;
}
TESS_API char** TESS_CALL
TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle) {
GenericVector<STRING> languages;
handle->GetAvailableLanguagesAsVector(&languages);
char** arr = new char*[languages.size() + 1];
for (int index = 0; index < languages.size(); ++index) {
arr[index] = languages[index].strdup();
}
arr[languages.size()] = nullptr;
return arr;
}
#ifndef DISABLED_LEGACY_ENGINE
TESS_API int TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle,
const char* datapath,
const char* language) {
return handle->InitLangMod(datapath, language);
}
#endif
TESS_API void TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle) {
handle->InitForAnalysePage();
}
TESS_API void TESS_CALL TessBaseAPIReadConfigFile(TessBaseAPI* handle,
const char* filename) {
handle->ReadConfigFile(filename);
}
TESS_API void TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle,
const char* filename) {
handle->ReadDebugConfigFile(filename);
}
TESS_API void TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle,
TessPageSegMode mode) {
handle->SetPageSegMode(mode);
}
TESS_API TessPageSegMode TESS_CALL
TessBaseAPIGetPageSegMode(const TessBaseAPI* handle) {
return handle->GetPageSegMode();
}
TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle,
const unsigned char* imagedata,
int bytes_per_pixel,
int bytes_per_line, int left, int top,
int width, int height) {
return handle->TesseractRect(imagedata, bytes_per_pixel, bytes_per_line, left,
top, width, height);
}
#ifndef DISABLED_LEGACY_ENGINE
TESS_API void TESS_CALL
TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle) {
handle->ClearAdaptiveClassifier();
}
#endif
TESS_API void TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle,
const unsigned char* imagedata,
int width, int height,
int bytes_per_pixel,
int bytes_per_line) {
handle->SetImage(imagedata, width, height, bytes_per_pixel, bytes_per_line);
}
TESS_API void TESS_CALL TessBaseAPISetImage2(TessBaseAPI* handle,
struct Pix* pix) {
return handle->SetImage(pix);
}
TESS_API void TESS_CALL TessBaseAPISetSourceResolution(TessBaseAPI* handle,
int ppi) {
handle->SetSourceResolution(ppi);
}
TESS_API void TESS_CALL TessBaseAPISetRectangle(TessBaseAPI* handle, int left,
int top, int width,
int height) {
handle->SetRectangle(left, top, width, height);
}
TESS_API void TESS_CALL TessBaseAPISetThresholder(
TessBaseAPI* handle, TessImageThresholder* thresholder) {
handle->SetThresholder(thresholder);
}
TESS_API struct Pix* TESS_CALL
TessBaseAPIGetThresholdedImage(TessBaseAPI* handle) {
return handle->GetThresholdedImage();
}
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle,
struct Pixa** pixa) {
return handle->GetRegions(pixa);
}
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle,
struct Pixa** pixa,
int** blockids) {
return handle->GetTextlines(pixa, blockids);
}
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines1(
TessBaseAPI* handle, const BOOL raw_image, const int raw_padding,
struct Pixa** pixa, int** blockids, int** paraids) {
return handle->GetTextlines(raw_image != 0, raw_padding, pixa, blockids,
paraids);
}
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle,
struct Pixa** pixa,
int** blockids) {
return handle->GetStrips(pixa, blockids);
}
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle,
struct Pixa** pixa) {
return handle->GetWords(pixa);
}
TESS_API struct Boxa* TESS_CALL
TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc) {
return handle->GetConnectedComponents(cc);
}
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetComponentImages(
TessBaseAPI* handle, TessPageIteratorLevel level, BOOL text_only,
struct Pixa** pixa, int** blockids) {
return handle->GetComponentImages(level, static_cast<bool>(text_only), pixa,
blockids);
}
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetComponentImages1(
TessBaseAPI* handle, const TessPageIteratorLevel level,
const BOOL text_only, const BOOL raw_image, const int raw_padding,
struct Pixa** pixa, int** blockids, int** paraids) {
return handle->GetComponentImages(level, static_cast<bool>(text_only),
raw_image != 0, raw_padding, pixa, blockids,
paraids);
}
TESS_API int TESS_CALL
TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle) {
return handle->GetThresholdedImageScaleFactor();
}
TESS_API TessPageIterator* TESS_CALL
TessBaseAPIAnalyseLayout(TessBaseAPI* handle) {
return handle->AnalyseLayout();
}
TESS_API int TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle,
ETEXT_DESC* monitor) {
return handle->Recognize(monitor);
}
#ifndef DISABLED_LEGACY_ENGINE
TESS_API int TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle,
ETEXT_DESC* monitor) {
return handle->RecognizeForChopTest(monitor);
}
#endif
TESS_API BOOL TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle,
const char* filename,
const char* retry_config,
int timeout_millisec,
TessResultRenderer* renderer) {
return static_cast<int>(
handle->ProcessPages(filename, retry_config, timeout_millisec, renderer));
}
TESS_API BOOL TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle,
struct Pix* pix, int page_index,
const char* filename,
const char* retry_config,
int timeout_millisec,
TessResultRenderer* renderer) {
return static_cast<int>(handle->ProcessPage(
pix, page_index, filename, retry_config, timeout_millisec, renderer));
}
TESS_API TessResultIterator* TESS_CALL
TessBaseAPIGetIterator(TessBaseAPI* handle) {
return handle->GetIterator();
}
TESS_API TessMutableIterator* TESS_CALL
TessBaseAPIGetMutableIterator(TessBaseAPI* handle) {
return handle->GetMutableIterator();
}
TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle) {
return handle->GetUTF8Text();
}
TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle,
int page_number) {
return handle->GetHOCRText(nullptr, page_number);
}
TESS_API char* TESS_CALL TessBaseAPIGetAltoText(TessBaseAPI* handle,
int page_number) {
return handle->GetAltoText(page_number);
}
TESS_API char* TESS_CALL TessBaseAPIGetTsvText(TessBaseAPI* handle,
int page_number) {
return handle->GetTSVText(page_number);
}
TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle,
int page_number) {
return handle->GetBoxText(page_number);
}
TESS_API char* TESS_CALL TessBaseAPIGetWordStrBoxText(TessBaseAPI* handle,
int page_number) {
return handle->GetWordStrBoxText(page_number);
}
TESS_API char* TESS_CALL TessBaseAPIGetLSTMBoxText(TessBaseAPI* handle,
int page_number) {
return handle->GetLSTMBoxText(page_number);
}
TESS_API char* TESS_CALL TessBaseAPIGetUNLVText(TessBaseAPI* handle) {
return handle->GetUNLVText();
}
TESS_API int TESS_CALL TessBaseAPIMeanTextConf(TessBaseAPI* handle) {
return handle->MeanTextConf();
}
TESS_API int* TESS_CALL TessBaseAPIAllWordConfidences(TessBaseAPI* handle) {
return handle->AllWordConfidences();
}
#ifndef DISABLED_LEGACY_ENGINE
TESS_API BOOL TESS_CALL TessBaseAPIAdaptToWordStr(TessBaseAPI* handle,
TessPageSegMode mode,
const char* wordstr) {
return static_cast<int>(handle->AdaptToWordStr(mode, wordstr));
}
#endif
TESS_API void TESS_CALL TessBaseAPIClear(TessBaseAPI* handle) {
handle->Clear();
}
TESS_API void TESS_CALL TessBaseAPIEnd(TessBaseAPI* handle) {
handle->End();
}
TESS_API int TESS_CALL TessBaseAPIIsValidWord(TessBaseAPI* handle,
const char* word) {
return handle->IsValidWord(word);
}
TESS_API BOOL TESS_CALL TessBaseAPIGetTextDirection(TessBaseAPI* handle,
int* out_offset,
float* out_slope) {
return static_cast<int>(handle->GetTextDirection(out_offset, out_slope));
}
TESS_API void TESS_CALL TessBaseAPISetDictFunc(TessBaseAPI* handle,
TessDictFunc f) {
handle->SetDictFunc(f);
}
TESS_API void TESS_CALL
TessBaseAPIClearPersistentCache(TessBaseAPI* /*handle*/) {
TessBaseAPI::ClearPersistentCache();
}
TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(
TessBaseAPI* handle, TessProbabilityInContextFunc f) {
handle->SetProbabilityInContextFunc(f);
}
#ifndef DISABLED_LEGACY_ENGINE
TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(
TessBaseAPI* handle, int* orient_deg, float* orient_conf,
const char** script_name, float* script_conf) {
bool success;
success = handle->DetectOrientationScript(orient_deg, orient_conf,
script_name, script_conf);
return static_cast<BOOL>(success);
}
TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(
TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features,
int* num_features, int* FeatureOutlineIndex) {
handle->GetFeaturesForBlob(blob, int_features, num_features,
FeatureOutlineIndex);
}
TESS_API ROW* TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top,
int right, int bottom) {
return TessBaseAPI::FindRowForBox(blocks, left, top, right, bottom);
}
TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier(
TessBaseAPI* handle, TBLOB* blob, int num_max_matches, int* unichar_ids,
float* ratings, int* num_matches_returned) {
handle->RunAdaptiveClassifier(blob, num_max_matches, unichar_ids, ratings,
num_matches_returned);
}
#endif // ndef DISABLED_LEGACY_ENGINE
TESS_API const char* TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle,
int unichar_id) {
return handle->GetUnichar(unichar_id);
}
TESS_API const TessDawg* TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle,
int i) {
return handle->GetDawg(i);
}
TESS_API int TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle) {
return handle->NumDawgs();
}
#ifndef DISABLED_LEGACY_ENGINE
TESS_API ROW* TESS_CALL TessMakeTessOCRRow(float baseline, float xheight,
float descender, float ascender) {
return TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender);
}
TESS_API TBLOB* TESS_CALL TessMakeTBLOB(struct Pix* pix) {
return TessBaseAPI::MakeTBLOB(pix);
}
TESS_API void TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row,
BOOL numeric_mode) {
TessBaseAPI::NormalizeTBLOB(tblob, row, static_cast<bool>(numeric_mode));
}
#endif // ndef DISABLED_LEGACY_ENGINE
TESS_API TessOcrEngineMode TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle) {
return handle->oem();
}
TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle,
TessTruthCallback* cb) {
handle->InitTruthCallback(cb);
}
TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle,
double margin) {
handle->set_min_orientation_margin(margin);
}
TESS_API void TESS_CALL TessBaseGetBlockTextOrientations(
TessBaseAPI* handle, int** block_orientation, bool** vertical_writing) {
handle->GetBlockTextOrientations(block_orientation, vertical_writing);
}
#ifndef DISABLED_LEGACY_ENGINE
TESS_API BLOCK_LIST* TESS_CALL
TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle) {
return handle->FindLinesCreateBlockList();
}
#endif
TESS_API void TESS_CALL TessPageIteratorDelete(TessPageIterator* handle) {
delete handle;
}
TESS_API TessPageIterator* TESS_CALL
TessPageIteratorCopy(const TessPageIterator* handle) {
return new TessPageIterator(*handle);
}
TESS_API void TESS_CALL TessPageIteratorBegin(TessPageIterator* handle) {
handle->Begin();
}
TESS_API BOOL TESS_CALL TessPageIteratorNext(TessPageIterator* handle,
TessPageIteratorLevel level) {
return static_cast<int>(handle->Next(level));
}
TESS_API BOOL TESS_CALL TessPageIteratorIsAtBeginningOf(
const TessPageIterator* handle, TessPageIteratorLevel level) {
return static_cast<int>(handle->IsAtBeginningOf(level));
}
TESS_API BOOL TESS_CALL TessPageIteratorIsAtFinalElement(
const TessPageIterator* handle, TessPageIteratorLevel level,
TessPageIteratorLevel element) {
return static_cast<int>(handle->IsAtFinalElement(level, element));
}
TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox(
const TessPageIterator* handle, TessPageIteratorLevel level, int* left,
int* top, int* right, int* bottom) {
return static_cast<int>(handle->BoundingBox(level, left, top, right, bottom));
}
TESS_API TessPolyBlockType TESS_CALL
TessPageIteratorBlockType(const TessPageIterator* handle) {
return handle->BlockType();
}
TESS_API struct Pix* TESS_CALL TessPageIteratorGetBinaryImage(
const TessPageIterator* handle, TessPageIteratorLevel level) {
return handle->GetBinaryImage(level);
}
TESS_API struct Pix* TESS_CALL TessPageIteratorGetImage(
const TessPageIterator* handle, TessPageIteratorLevel level, int padding,
struct Pix* original_image, int* left, int* top) {
return handle->GetImage(level, padding, original_image, left, top);
}
TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle,
TessPageIteratorLevel level,
int* x1, int* y1, int* x2,
int* y2) {
return static_cast<int>(handle->Baseline(level, x1, y1, x2, y2));
}
TESS_API void TESS_CALL TessPageIteratorOrientation(
TessPageIterator* handle, TessOrientation* orientation,
TessWritingDirection* writing_direction, TessTextlineOrder* textline_order,
float* deskew_angle) {
handle->Orientation(orientation, writing_direction, textline_order,
deskew_angle);
}
TESS_API void TESS_CALL TessPageIteratorParagraphInfo(
TessPageIterator* handle, TessParagraphJustification* justification,
BOOL* is_list_item, BOOL* is_crown, int* first_line_indent) {
bool bool_is_list_item;
bool bool_is_crown;
handle->ParagraphInfo(justification, &bool_is_list_item, &bool_is_crown,
first_line_indent);
if (is_list_item != nullptr) {
*is_list_item = static_cast<int>(bool_is_list_item);
}
if (is_crown != nullptr) {
*is_crown = static_cast<int>(bool_is_crown);
}
}
TESS_API void TESS_CALL TessResultIteratorDelete(TessResultIterator* handle) {
delete handle;
}
TESS_API TessResultIterator* TESS_CALL
TessResultIteratorCopy(const TessResultIterator* handle) {
return new TessResultIterator(*handle);
}
TESS_API TessPageIterator* TESS_CALL
TessResultIteratorGetPageIterator(TessResultIterator* handle) {
return handle;
}
TESS_API const TessPageIterator* TESS_CALL
TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle) {
return handle;
}
TESS_API TessChoiceIterator* TESS_CALL
TessResultIteratorGetChoiceIterator(const TessResultIterator* handle) {
return new TessChoiceIterator(*handle);
}
TESS_API BOOL TESS_CALL TessResultIteratorNext(TessResultIterator* handle,
TessPageIteratorLevel level) {
return static_cast<int>(handle->Next(level));
}
TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text(
const TessResultIterator* handle, TessPageIteratorLevel level) {
return handle->GetUTF8Text(level);
}
TESS_API float TESS_CALL TessResultIteratorConfidence(
const TessResultIterator* handle, TessPageIteratorLevel level) {
return handle->Confidence(level);
}
TESS_API const char* TESS_CALL
TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle) {
return handle->WordRecognitionLanguage();
}
TESS_API const char* TESS_CALL TessResultIteratorWordFontAttributes(
const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic,
BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, BOOL* is_smallcaps,
int* pointsize, int* font_id) {
bool bool_is_bold;
bool bool_is_italic;
bool bool_is_underlined;
bool bool_is_monospace;
bool bool_is_serif;
bool bool_is_smallcaps;
const char* ret = handle->WordFontAttributes(
&bool_is_bold, &bool_is_italic, &bool_is_underlined, &bool_is_monospace,
&bool_is_serif, &bool_is_smallcaps, pointsize, font_id);
if (is_bold != nullptr) {
*is_bold = static_cast<int>(bool_is_bold);
}
if (is_italic != nullptr) {
*is_italic = static_cast<int>(bool_is_italic);
}
if (is_underlined != nullptr) {
*is_underlined = static_cast<int>(bool_is_underlined);
}
if (is_monospace != nullptr) {
*is_monospace = static_cast<int>(bool_is_monospace);
}
if (is_serif != nullptr) {
*is_serif = static_cast<int>(bool_is_serif);
}
if (is_smallcaps != nullptr) {
*is_smallcaps = static_cast<int>(bool_is_smallcaps);
}
return ret;
}
TESS_API BOOL TESS_CALL
TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle) {
return static_cast<int>(handle->WordIsFromDictionary());
}
TESS_API BOOL TESS_CALL
TessResultIteratorWordIsNumeric(const TessResultIterator* handle) {
return static_cast<int>(handle->WordIsNumeric());
}
TESS_API BOOL TESS_CALL
TessResultIteratorSymbolIsSuperscript(const TessResultIterator* handle) {
return static_cast<int>(handle->SymbolIsSuperscript());
}
TESS_API BOOL TESS_CALL
TessResultIteratorSymbolIsSubscript(const TessResultIterator* handle) {
return static_cast<int>(handle->SymbolIsSubscript());
}
TESS_API BOOL TESS_CALL
TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle) {
return static_cast<int>(handle->SymbolIsDropcap());
}
TESS_API void TESS_CALL TessChoiceIteratorDelete(TessChoiceIterator* handle) {
delete handle;
}
TESS_API BOOL TESS_CALL TessChoiceIteratorNext(TessChoiceIterator* handle) {
return static_cast<int>(handle->Next());
}
TESS_API const char* TESS_CALL
TessChoiceIteratorGetUTF8Text(const TessChoiceIterator* handle) {
return handle->GetUTF8Text();
}
TESS_API float TESS_CALL
TessChoiceIteratorConfidence(const TessChoiceIterator* handle) {
return handle->Confidence();
}
TESS_API ETEXT_DESC* TESS_CALL TessMonitorCreate() {
return new ETEXT_DESC();
}
TESS_API void TESS_CALL TessMonitorDelete(ETEXT_DESC* monitor) {
delete monitor;
}
TESS_API void TESS_CALL TessMonitorSetCancelFunc(ETEXT_DESC* monitor,
TessCancelFunc cancelFunc) {
monitor->cancel = cancelFunc;
}
TESS_API void TESS_CALL TessMonitorSetCancelThis(ETEXT_DESC* monitor,
void* cancelThis) {
monitor->cancel_this = cancelThis;
}
TESS_API void* TESS_CALL TessMonitorGetCancelThis(ETEXT_DESC* monitor) {
return monitor->cancel_this;
}
TESS_API void TESS_CALL
TessMonitorSetProgressFunc(ETEXT_DESC* monitor, TessProgressFunc progressFunc) {
monitor->progress_callback2 = progressFunc;
}
TESS_API int TESS_CALL TessMonitorGetProgress(ETEXT_DESC* monitor) {
return monitor->progress;
}
TESS_API void TESS_CALL TessMonitorSetDeadlineMSecs(ETEXT_DESC* monitor,
int deadline) {
monitor->set_deadline_msecs(deadline);
}

630
third_party/ocr/tesseract-ocr/src/capi.h vendored Normal file
View File

@ -0,0 +1,630 @@
///////////////////////////////////////////////////////////////////////
// File: capi.h
// Description: C-API TessBaseAPI
//
// (C) Copyright 2012, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef API_CAPI_H_
#define API_CAPI_H_
#if defined(TESSERACT_API_BASEAPI_H_) && !defined(TESS_CAPI_INCLUDE_BASEAPI)
# define TESS_CAPI_INCLUDE_BASEAPI
#endif
#ifdef TESS_CAPI_INCLUDE_BASEAPI
# include "baseapi.h"
# include "ocrclass.h"
# include "pageiterator.h"
# include "renderer.h"
# include "resultiterator.h"
#else
# include <stdbool.h>
# include <stdio.h>
# include "platform.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
#ifndef TESS_CALL
# if defined(WIN32)
# define TESS_CALL __cdecl
# else
# define TESS_CALL
# endif
#endif
#ifndef BOOL
# define BOOL int
# define TRUE 1
# define FALSE 0
#endif
#ifdef TESS_CAPI_INCLUDE_BASEAPI
typedef tesseract::TessResultRenderer TessResultRenderer;
typedef tesseract::TessTextRenderer TessTextRenderer;
typedef tesseract::TessHOcrRenderer TessHOcrRenderer;
typedef tesseract::TessAltoRenderer TessAltoRenderer;
typedef tesseract::TessTsvRenderer TessTsvRenderer;
typedef tesseract::TessPDFRenderer TessPDFRenderer;
typedef tesseract::TessUnlvRenderer TessUnlvRenderer;
typedef tesseract::TessBoxTextRenderer TessBoxTextRenderer;
typedef tesseract::TessWordStrBoxRenderer TessWordStrBoxRenderer;
typedef tesseract::TessLSTMBoxRenderer TessLSTMBoxRenderer;
typedef tesseract::TessBaseAPI TessBaseAPI;
typedef tesseract::PageIterator TessPageIterator;
typedef tesseract::ResultIterator TessResultIterator;
typedef tesseract::MutableIterator TessMutableIterator;
typedef tesseract::ChoiceIterator TessChoiceIterator;
typedef tesseract::OcrEngineMode TessOcrEngineMode;
typedef tesseract::PageSegMode TessPageSegMode;
typedef tesseract::ImageThresholder TessImageThresholder;
typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
typedef tesseract::DictFunc TessDictFunc;
typedef tesseract::ProbabilityInContextFunc TessProbabilityInContextFunc;
// typedef tesseract::ParamsModelClassifyFunc TessParamsModelClassifyFunc;
typedef tesseract::FillLatticeFunc TessFillLatticeFunc;
typedef tesseract::Dawg TessDawg;
typedef tesseract::TruthCallback TessTruthCallback;
typedef tesseract::Orientation TessOrientation;
typedef tesseract::ParagraphJustification TessParagraphJustification;
typedef tesseract::WritingDirection TessWritingDirection;
typedef tesseract::TextlineOrder TessTextlineOrder;
typedef PolyBlockType TessPolyBlockType;
#else
typedef struct TessResultRenderer TessResultRenderer;
typedef struct TessTextRenderer TessTextRenderer;
typedef struct TessHOcrRenderer TessHOcrRenderer;
typedef struct TessPDFRenderer TessPDFRenderer;
typedef struct TessUnlvRenderer TessUnlvRenderer;
typedef struct TessBoxTextRenderer TessBoxTextRenderer;
typedef struct TessBaseAPI TessBaseAPI;
typedef struct TessPageIterator TessPageIterator;
typedef struct TessResultIterator TessResultIterator;
typedef struct TessMutableIterator TessMutableIterator;
typedef struct TessChoiceIterator TessChoiceIterator;
typedef enum TessOcrEngineMode {
OEM_TESSERACT_ONLY,
OEM_LSTM_ONLY,
OEM_TESSERACT_LSTM_COMBINED,
OEM_DEFAULT
} TessOcrEngineMode;
typedef enum TessPageSegMode {
PSM_OSD_ONLY,
PSM_AUTO_OSD,
PSM_AUTO_ONLY,
PSM_AUTO,
PSM_SINGLE_COLUMN,
PSM_SINGLE_BLOCK_VERT_TEXT,
PSM_SINGLE_BLOCK,
PSM_SINGLE_LINE,
PSM_SINGLE_WORD,
PSM_CIRCLE_WORD,
PSM_SINGLE_CHAR,
PSM_SPARSE_TEXT,
PSM_SPARSE_TEXT_OSD,
PSM_RAW_LINE,
PSM_COUNT
} TessPageSegMode;
typedef enum TessPageIteratorLevel {
RIL_BLOCK,
RIL_PARA,
RIL_TEXTLINE,
RIL_WORD,
RIL_SYMBOL
} TessPageIteratorLevel;
typedef enum TessPolyBlockType {
PT_UNKNOWN,
PT_FLOWING_TEXT,
PT_HEADING_TEXT,
PT_PULLOUT_TEXT,
PT_EQUATION,
PT_INLINE_EQUATION,
PT_TABLE,
PT_VERTICAL_TEXT,
PT_CAPTION_TEXT,
PT_FLOWING_IMAGE,
PT_HEADING_IMAGE,
PT_PULLOUT_IMAGE,
PT_HORZ_LINE,
PT_VERT_LINE,
PT_NOISE,
PT_COUNT
} TessPolyBlockType;
typedef enum TessOrientation {
ORIENTATION_PAGE_UP,
ORIENTATION_PAGE_RIGHT,
ORIENTATION_PAGE_DOWN,
ORIENTATION_PAGE_LEFT
} TessOrientation;
typedef enum TessParagraphJustification {
JUSTIFICATION_UNKNOWN,
JUSTIFICATION_LEFT,
JUSTIFICATION_CENTER,
JUSTIFICATION_RIGHT
} TessParagraphJustification;
typedef enum TessWritingDirection {
WRITING_DIRECTION_LEFT_TO_RIGHT,
WRITING_DIRECTION_RIGHT_TO_LEFT,
WRITING_DIRECTION_TOP_TO_BOTTOM
} TessWritingDirection;
typedef enum TessTextlineOrder {
TEXTLINE_ORDER_LEFT_TO_RIGHT,
TEXTLINE_ORDER_RIGHT_TO_LEFT,
TEXTLINE_ORDER_TOP_TO_BOTTOM
} TessTextlineOrder;
typedef struct ETEXT_DESC ETEXT_DESC;
#endif
typedef bool (*TessCancelFunc)(void* cancel_this, int words);
typedef bool (*TessProgressFunc)(ETEXT_DESC* ths, int left, int right, int top,
int bottom);
struct Pix;
struct Boxa;
struct Pixa;
TESS_API int MyOSD(TessBaseAPI* api);
/* General free functions */
TESS_API const char* TESS_CALL TessVersion();
TESS_API void TESS_CALL TessDeleteText(const char* text);
TESS_API void TESS_CALL TessDeleteTextArray(char** arr);
TESS_API void TESS_CALL TessDeleteIntArray(const int* arr);
/* Renderer API */
TESS_API TessResultRenderer* TESS_CALL
TessTextRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TESS_CALL
TessHOcrRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TESS_CALL
TessHOcrRendererCreate2(const char* outputbase, BOOL font_info);
TESS_API TessResultRenderer* TESS_CALL
TessAltoRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TESS_CALL
TessTsvRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(
const char* outputbase, const char* datadir, BOOL textonly);
TESS_API TessResultRenderer* TESS_CALL
TessUnlvRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TESS_CALL
TessBoxTextRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TESS_CALL
TessLSTMBoxRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TESS_CALL
TessWordStrBoxRendererCreate(const char* outputbase);
TESS_API void TESS_CALL TessDeleteResultRenderer(TessResultRenderer* renderer);
TESS_API void TESS_CALL TessResultRendererInsert(TessResultRenderer* renderer,
TessResultRenderer* next);
TESS_API TessResultRenderer* TESS_CALL
TessResultRendererNext(TessResultRenderer* renderer);
TESS_API BOOL TESS_CALL TessResultRendererBeginDocument(
TessResultRenderer* renderer, const char* title);
TESS_API BOOL TESS_CALL TessResultRendererAddImage(TessResultRenderer* renderer,
TessBaseAPI* api);
TESS_API BOOL TESS_CALL
TessResultRendererEndDocument(TessResultRenderer* renderer);
TESS_API const char* TESS_CALL
TessResultRendererExtention(TessResultRenderer* renderer);
TESS_API const char* TESS_CALL
TessResultRendererTitle(TessResultRenderer* renderer);
TESS_API int TESS_CALL TessResultRendererImageNum(TessResultRenderer* renderer);
/* Base API */
TESS_API TessBaseAPI* TESS_CALL TessBaseAPICreate();
TESS_API void TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle);
TESS_API size_t TESS_CALL TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle,
void** device);
TESS_API void TESS_CALL TessBaseAPISetInputName(TessBaseAPI* handle,
const char* name);
TESS_API const char* TESS_CALL TessBaseAPIGetInputName(TessBaseAPI* handle);
TESS_API void TESS_CALL TessBaseAPISetInputImage(TessBaseAPI* handle,
struct Pix* pix);
TESS_API struct Pix* TESS_CALL TessBaseAPIGetInputImage(TessBaseAPI* handle);
TESS_API int TESS_CALL TessBaseAPIGetSourceYResolution(TessBaseAPI* handle);
TESS_API const char* TESS_CALL TessBaseAPIGetDatapath(TessBaseAPI* handle);
TESS_API void TESS_CALL TessBaseAPISetOutputName(TessBaseAPI* handle,
const char* name);
TESS_API BOOL TESS_CALL TessBaseAPISetVariable(TessBaseAPI* handle,
const char* name,
const char* value);
TESS_API BOOL TESS_CALL TessBaseAPISetDebugVariable(TessBaseAPI* handle,
const char* name,
const char* value);
TESS_API BOOL TESS_CALL TessBaseAPIGetIntVariable(const TessBaseAPI* handle,
const char* name, int* value);
TESS_API BOOL TESS_CALL TessBaseAPIGetBoolVariable(const TessBaseAPI* handle,
const char* name,
BOOL* value);
TESS_API BOOL TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle,
const char* name,
double* value);
TESS_API const char* TESS_CALL
TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name);
TESS_API void TESS_CALL TessBaseAPIPrintVariables(const TessBaseAPI* handle,
FILE* fp);
TESS_API BOOL TESS_CALL TessBaseAPIPrintVariablesToFile(
const TessBaseAPI* handle, const char* filename);
#ifdef TESS_CAPI_INCLUDE_BASEAPI
TESS_API BOOL TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle,
const char* name,
STRING* val);
TESS_API int TESS_CALL TessBaseAPIInit(
TessBaseAPI* handle, const char* datapath, const char* language,
TessOcrEngineMode mode, char** configs, int configs_size,
const STRING* vars_vec, size_t vars_vec_size, const STRING* vars_values,
size_t vars_values_size, BOOL set_only_init_params);
#endif // def TESS_CAPI_INCLUDE_BASEAPI
TESS_API int TESS_CALL TessBaseAPIInit1(TessBaseAPI* handle,
const char* datapath,
const char* language,
TessOcrEngineMode oem, char** configs,
int configs_size);
TESS_API int TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle,
const char* datapath,
const char* language,
TessOcrEngineMode oem);
TESS_API int TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle,
const char* datapath,
const char* language);
TESS_API int TESS_CALL TessBaseAPIInit4(
TessBaseAPI* handle, const char* datapath, const char* language,
TessOcrEngineMode mode, char** configs, int configs_size, char** vars_vec,
char** vars_values, size_t vars_vec_size, BOOL set_only_non_debug_params);
TESS_API const char* TESS_CALL
TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle);
TESS_API char** TESS_CALL
TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle);
TESS_API char** TESS_CALL
TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle);
TESS_API int TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle,
const char* datapath,
const char* language);
TESS_API void TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle);
TESS_API void TESS_CALL TessBaseAPIReadConfigFile(TessBaseAPI* handle,
const char* filename);
TESS_API void TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle,
const char* filename);
TESS_API void TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle,
TessPageSegMode mode);
TESS_API TessPageSegMode TESS_CALL
TessBaseAPIGetPageSegMode(const TessBaseAPI* handle);
TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle,
const unsigned char* imagedata,
int bytes_per_pixel,
int bytes_per_line, int left, int top,
int width, int height);
TESS_API void TESS_CALL TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle);
TESS_API void TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle,
const unsigned char* imagedata,
int width, int height,
int bytes_per_pixel,
int bytes_per_line);
TESS_API void TESS_CALL TessBaseAPISetImage2(TessBaseAPI* handle,
struct Pix* pix);
TESS_API void TESS_CALL TessBaseAPISetSourceResolution(TessBaseAPI* handle,
int ppi);
TESS_API void TESS_CALL TessBaseAPISetRectangle(TessBaseAPI* handle, int left,
int top, int width, int height);
#ifdef TESS_CAPI_INCLUDE_BASEAPI
TESS_API void TESS_CALL TessBaseAPISetThresholder(
TessBaseAPI* handle, TessImageThresholder* thresholder);
#endif
TESS_API struct Pix* TESS_CALL
TessBaseAPIGetThresholdedImage(TessBaseAPI* handle);
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle,
struct Pixa** pixa);
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle,
struct Pixa** pixa,
int** blockids);
TESS_API struct Boxa* TESS_CALL
TessBaseAPIGetTextlines1(TessBaseAPI* handle, BOOL raw_image, int raw_padding,
struct Pixa** pixa, int** blockids, int** paraids);
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle,
struct Pixa** pixa,
int** blockids);
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle,
struct Pixa** pixa);
TESS_API struct Boxa* TESS_CALL
TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc);
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetComponentImages(
TessBaseAPI* handle, TessPageIteratorLevel level, BOOL text_only,
struct Pixa** pixa, int** blockids);
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetComponentImages1(
TessBaseAPI* handle, TessPageIteratorLevel level, BOOL text_only,
BOOL raw_image, int raw_padding, struct Pixa** pixa, int** blockids,
int** paraids);
TESS_API int TESS_CALL
TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle);
TESS_API TessPageIterator* TESS_CALL
TessBaseAPIAnalyseLayout(TessBaseAPI* handle);
TESS_API int TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle,
ETEXT_DESC* monitor);
#ifndef DISABLED_LEGACY_ENGINE
TESS_API int TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle,
ETEXT_DESC* monitor);
#endif
TESS_API BOOL TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle,
const char* filename,
const char* retry_config,
int timeout_millisec,
TessResultRenderer* renderer);
TESS_API BOOL TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle,
struct Pix* pix, int page_index,
const char* filename,
const char* retry_config,
int timeout_millisec,
TessResultRenderer* renderer);
TESS_API TessResultIterator* TESS_CALL
TessBaseAPIGetIterator(TessBaseAPI* handle);
TESS_API TessMutableIterator* TESS_CALL
TessBaseAPIGetMutableIterator(TessBaseAPI* handle);
TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle);
TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle,
int page_number);
TESS_API char* TESS_CALL TessBaseAPIGetAltoText(TessBaseAPI* handle,
int page_number);
TESS_API char* TESS_CALL TessBaseAPIGetTsvText(TessBaseAPI* handle,
int page_number);
TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle,
int page_number);
TESS_API char* TESS_CALL TessBaseAPIGetLSTMBoxText(TessBaseAPI* handle,
int page_number);
TESS_API char* TESS_CALL TessBaseAPIGetWordStrBoxText(TessBaseAPI* handle,
int page_number);
TESS_API char* TESS_CALL TessBaseAPIGetUNLVText(TessBaseAPI* handle);
TESS_API int TESS_CALL TessBaseAPIMeanTextConf(TessBaseAPI* handle);
TESS_API int* TESS_CALL TessBaseAPIAllWordConfidences(TessBaseAPI* handle);
#ifndef DISABLED_LEGACY_ENGINE
TESS_API BOOL TESS_CALL TessBaseAPIAdaptToWordStr(TessBaseAPI* handle,
TessPageSegMode mode,
const char* wordstr);
#endif // ndef DISABLED_LEGACY_ENGINE
TESS_API void TESS_CALL TessBaseAPIClear(TessBaseAPI* handle);
TESS_API void TESS_CALL TessBaseAPIEnd(TessBaseAPI* handle);
TESS_API int TESS_CALL TessBaseAPIIsValidWord(TessBaseAPI* handle,
const char* word);
TESS_API BOOL TESS_CALL TessBaseAPIGetTextDirection(TessBaseAPI* handle,
int* out_offset,
float* out_slope);
#ifdef TESS_CAPI_INCLUDE_BASEAPI
TESS_API void TESS_CALL TessBaseAPISetDictFunc(TessBaseAPI* handle,
TessDictFunc f);
TESS_API void TESS_CALL TessBaseAPIClearPersistentCache(TessBaseAPI* handle);
TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(
TessBaseAPI* handle, TessProbabilityInContextFunc f);
// Call TessDeleteText(*best_script_name) to free memory allocated by this
// function
TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(
TessBaseAPI* handle, int* orient_deg, float* orient_conf,
const char** script_name, float* script_conf);
#endif // def TESS_CAPI_INCLUDE_BASEAPI
TESS_API const char* TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle,
int unichar_id);
TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle,
double margin);
#ifdef TESS_CAPI_INCLUDE_BASEAPI
TESS_API const TessDawg* TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle,
int i);
TESS_API int TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle);
TESS_API TessOcrEngineMode TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle);
TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle,
TessTruthCallback* cb);
TESS_API void TESS_CALL TessBaseGetBlockTextOrientations(
TessBaseAPI* handle, int** block_orientation, bool** vertical_writing);
#endif
/* Page iterator */
TESS_API void TESS_CALL TessPageIteratorDelete(TessPageIterator* handle);
TESS_API TessPageIterator* TESS_CALL
TessPageIteratorCopy(const TessPageIterator* handle);
TESS_API void TESS_CALL TessPageIteratorBegin(TessPageIterator* handle);
TESS_API BOOL TESS_CALL TessPageIteratorNext(TessPageIterator* handle,
TessPageIteratorLevel level);
TESS_API BOOL TESS_CALL TessPageIteratorIsAtBeginningOf(
const TessPageIterator* handle, TessPageIteratorLevel level);
TESS_API BOOL TESS_CALL TessPageIteratorIsAtFinalElement(
const TessPageIterator* handle, TessPageIteratorLevel level,
TessPageIteratorLevel element);
TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox(
const TessPageIterator* handle, TessPageIteratorLevel level, int* left,
int* top, int* right, int* bottom);
TESS_API TessPolyBlockType TESS_CALL
TessPageIteratorBlockType(const TessPageIterator* handle);
TESS_API struct Pix* TESS_CALL TessPageIteratorGetBinaryImage(
const TessPageIterator* handle, TessPageIteratorLevel level);
TESS_API struct Pix* TESS_CALL TessPageIteratorGetImage(
const TessPageIterator* handle, TessPageIteratorLevel level, int padding,
struct Pix* original_image, int* left, int* top);
TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle,
TessPageIteratorLevel level,
int* x1, int* y1, int* x2,
int* y2);
TESS_API void TESS_CALL TessPageIteratorOrientation(
TessPageIterator* handle, TessOrientation* orientation,
TessWritingDirection* writing_direction, TessTextlineOrder* textline_order,
float* deskew_angle);
TESS_API void TESS_CALL TessPageIteratorParagraphInfo(
TessPageIterator* handle, TessParagraphJustification* justification,
BOOL* is_list_item, BOOL* is_crown, int* first_line_indent);
/* Result iterator */
TESS_API void TESS_CALL TessResultIteratorDelete(TessResultIterator* handle);
TESS_API TessResultIterator* TESS_CALL
TessResultIteratorCopy(const TessResultIterator* handle);
TESS_API TessPageIterator* TESS_CALL
TessResultIteratorGetPageIterator(TessResultIterator* handle);
TESS_API const TessPageIterator* TESS_CALL
TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle);
TESS_API TessChoiceIterator* TESS_CALL
TessResultIteratorGetChoiceIterator(const TessResultIterator* handle);
TESS_API BOOL TESS_CALL TessResultIteratorNext(TessResultIterator* handle,
TessPageIteratorLevel level);
TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text(
const TessResultIterator* handle, TessPageIteratorLevel level);
TESS_API float TESS_CALL TessResultIteratorConfidence(
const TessResultIterator* handle, TessPageIteratorLevel level);
TESS_API const char* TESS_CALL
TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle);
TESS_API const char* TESS_CALL TessResultIteratorWordFontAttributes(
const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic,
BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, BOOL* is_smallcaps,
int* pointsize, int* font_id);
TESS_API BOOL TESS_CALL
TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle);
TESS_API BOOL TESS_CALL
TessResultIteratorWordIsNumeric(const TessResultIterator* handle);
TESS_API BOOL TESS_CALL
TessResultIteratorSymbolIsSuperscript(const TessResultIterator* handle);
TESS_API BOOL TESS_CALL
TessResultIteratorSymbolIsSubscript(const TessResultIterator* handle);
TESS_API BOOL TESS_CALL
TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle);
TESS_API void TESS_CALL TessChoiceIteratorDelete(TessChoiceIterator* handle);
TESS_API BOOL TESS_CALL TessChoiceIteratorNext(TessChoiceIterator* handle);
TESS_API const char* TESS_CALL
TessChoiceIteratorGetUTF8Text(const TessChoiceIterator* handle);
TESS_API float TESS_CALL
TessChoiceIteratorConfidence(const TessChoiceIterator* handle);
/* Progress monitor */
TESS_API ETEXT_DESC* TESS_CALL TessMonitorCreate();
TESS_API void TESS_CALL TessMonitorDelete(ETEXT_DESC* monitor);
TESS_API void TESS_CALL TessMonitorSetCancelFunc(ETEXT_DESC* monitor,
TessCancelFunc cancelFunc);
TESS_API void TESS_CALL TessMonitorSetCancelThis(ETEXT_DESC* monitor,
void* cancelThis);
TESS_API void* TESS_CALL TessMonitorGetCancelThis(ETEXT_DESC* monitor);
TESS_API void TESS_CALL
TessMonitorSetProgressFunc(ETEXT_DESC* monitor, TessProgressFunc progressFunc);
TESS_API int TESS_CALL TessMonitorGetProgress(ETEXT_DESC* monitor);
TESS_API void TESS_CALL TessMonitorSetDeadlineMSecs(ETEXT_DESC* monitor,
int deadline);
#ifndef DISABLED_LEGACY_ENGINE
# ifdef TESS_CAPI_INCLUDE_BASEAPI
TESS_API void TESS_CALL TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle,
TessFillLatticeFunc f);
TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(
TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features,
int* num_features, int* FeatureOutlineIndex);
TESS_API ROW* TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top,
int right, int bottom);
TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier(
TessBaseAPI* handle, TBLOB* blob, int num_max_matches, int* unichar_ids,
float* ratings, int* num_matches_returned);
TESS_API ROW* TESS_CALL TessMakeTessOCRRow(float baseline, float xheight,
float descender, float ascender);
TESS_API TBLOB* TESS_CALL TessMakeTBLOB(Pix* pix);
TESS_API void TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row,
BOOL numeric_mode);
TESS_API BLOCK_LIST* TESS_CALL
TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle);
TESS_API void TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list);
# endif // def TESS_CAPI_INCLUDE_BASEAPI
#endif // ndef DISABLED_LEGACY_ENGINE
#ifdef __cplusplus
}
#endif
#endif // API_CAPI_H_

View File

@ -93,6 +93,8 @@ class TESS_API TessBaseAPI {
TessBaseAPI(); TessBaseAPI();
virtual ~TessBaseAPI(); virtual ~TessBaseAPI();
int MyOSD();
/** /**
* Returns the version identifier as a static string. Do not delete. * Returns the version identifier as a static string. Do not delete.
*/ */

View File

@ -176,6 +176,8 @@ struct Pix;
struct Boxa; struct Boxa;
struct Pixa; struct Pixa;
TESS_API int MyOSD(TessBaseAPI* api);
/* General free functions */ /* General free functions */
TESS_API const char* TESS_CALL TessVersion(); TESS_API const char* TESS_CALL TessVersion();