diff --git a/modules/imgproc/HGOCRTesseract.cpp b/modules/imgproc/HGOCRTesseract.cpp index b8f14b3b..60903424 100644 --- a/modules/imgproc/HGOCRTesseract.cpp +++ b/modules/imgproc/HGOCRTesseract.cpp @@ -171,20 +171,7 @@ HGResult HGOCRTesseract::ImageTextDirectOCR(HGImage image, HGUInt* direct) HGBase_GetImageDpi(image2, &xDpi, &yDpi); TessBaseAPISetSourceResolution(m_baseApi, (xDpi + yDpi) / 2); - TessPageIterator* iter = TessBaseAPIAnalyseLayout(m_baseApi); - if (NULL == iter) - { - if (image2 != image) - HGBase_DestroyImage(image2); - return HGIMGPROC_ERR_OCR; - } - - TessOrientation orientation; - TessWritingDirection writing_direction; - TessTextlineOrder textline_order; - float deskew_angle; - TessPageIteratorOrientation(iter, &orientation, &writing_direction, &textline_order, &deskew_angle); - + int orientation = MyOSD(m_baseApi); if (TessOrientation::ORIENTATION_PAGE_UP == orientation) *direct = HGIMGPROC_OCRTEXTDIRECT_ORI; else if (TessOrientation::ORIENTATION_PAGE_RIGHT == orientation) @@ -194,7 +181,6 @@ HGResult HGOCRTesseract::ImageTextDirectOCR(HGImage image, HGUInt* direct) else if (TessOrientation::ORIENTATION_PAGE_LEFT == orientation) *direct = HGIMGPROC_OCRTEXTDIRECT_LEFT; - TessPageIteratorDelete(iter); if (image2 != image) HGBase_DestroyImage(image2); return HGBASE_ERR_OK; diff --git a/third_party/ocr/tesseract-ocr/src/baseapi.cpp b/third_party/ocr/tesseract-ocr/src/baseapi.cpp new file mode 100644 index 00000000..66640c52 --- /dev/null +++ b/third_party/ocr/tesseract-ocr/src/baseapi.cpp @@ -0,0 +1,2696 @@ +/********************************************************************** + * File: baseapi.cpp + * Description: Simple API for calling tesseract. + * Author: Ray Smith + * + * (C) Copyright 2006, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#define _USE_MATH_DEFINES // for M_PI + +// Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "baseapi.h" +#ifdef __linux__ +#include // for sigaction, SA_RESETHAND, SIGBUS, SIGFPE +#endif + +#if defined(_WIN32) +#include +#include +#else +#include // for closedir, opendir, readdir, DIR, dirent +#include +#include +#include // for stat, S_IFDIR +#include +#endif // _WIN32 + +#include // for round, M_PI +#include // for int32_t +#include // for strcmp, strcpy +#include // for size_t +#include // for std::cin +#include // for std::locale::classic +#include // for std::unique_ptr +#include // for std::pair +#include // for std::stringstream +#include // for std::vector +#ifdef HAVE_LIBCURL +#include +#endif +#include "allheaders.h" // for pixDestroy, boxCreate, boxaAddBox, box... +#ifndef DISABLED_LEGACY_ENGINE +#include "blobclass.h" // for ExtractFontName +#endif +#include "boxword.h" // for BoxWord +#include "config_auto.h" // for PACKAGE_VERSION +#include "coutln.h" // for C_OUTLINE_IT, C_OUTLINE_LIST +#include "dawg_cache.h" // for DawgCache +#include "dict.h" // for Dict +#include "edgblob.h" // for extract_edges +#include "elst.h" // for ELIST_ITERATOR, ELISTIZE, ELISTIZEH +#include "environ.h" // for l_uint8 +#include "equationdetect.h" // for EquationDetect +#include "errcode.h" // for ASSERT_HOST +#include "helpers.h" // for IntCastRounded, chomp_string +#include "imageio.h" // for IFF_TIFF_G4, IFF_TIFF, IFF_TIFF_G3, ... +#ifndef DISABLED_LEGACY_ENGINE +#include "intfx.h" // for INT_FX_RESULT_STRUCT +#endif +#include "mutableiterator.h" // for MutableIterator +#include "normalis.h" // for kBlnBaselineOffset, kBlnXHeight +#include "ocrclass.h" // for ETEXT_DESC +#if defined(USE_OPENCL) +#include "openclwrapper.h" // for OpenclDevice +#endif +#include "osdetect.h" // for OSResults, OSBestResult, OrientationId... +#include "pageres.h" // for PAGE_RES_IT, WERD_RES, PAGE_RES, CR_DE... +#include "paragraphs.h" // for DetectParagraphs +#include "params.h" // for BoolParam, IntParam, DoubleParam, Stri... +#include "pdblock.h" // for PDBLK +#include "points.h" // for FCOORD +#include "polyblk.h" // for POLY_BLOCK +#include "rect.h" // for TBOX +#include "renderer.h" // for TessResultRenderer +#include "resultiterator.h" // for ResultIterator +#include "stepblob.h" // for C_BLOB_IT, C_BLOB, C_BLOB_LIST +#include "strngs.h" // for STRING +#include "tessdatamanager.h" // for TessdataManager, kTrainedDataSuffix +#include "tesseractclass.h" // for Tesseract +#include "thresholder.h" // for ImageThresholder +#include "tprintf.h" // for tprintf +#include "werd.h" // for WERD, WERD_IT, W_FUZZY_NON, W_FUZZY_SP + +static BOOL_VAR(stream_filelist, false, "Stream a filelist from stdin"); +static STRING_VAR(document_title, "", "Title of output document (used for hOCR and PDF output)"); + +namespace tesseract { + +/** Minimum sensible image size to be worth running tesseract. */ +const int kMinRectSize = 10; +/** Character returned when Tesseract couldn't recognize as anything. */ +const char kTesseractReject = '~'; +/** Character used by UNLV error counter as a reject. */ +const char kUNLVReject = '~'; +/** Character used by UNLV as a suspect marker. */ +const char kUNLVSuspect = '^'; +/** + * Filename used for input image file, from which to derive a name to search + * for a possible UNLV zone file, if none is specified by SetInputName. + */ +static const char* kInputFile = "noname.tif"; +/** + * Temp file used for storing current parameters before applying retry values. + */ +static const char* kOldVarsFile = "failed_vars.txt"; +/** Max string length of an int. */ +const int kMaxIntSize = 22; + +/* Add all available languages recursively. +*/ +static void addAvailableLanguages(const STRING &datadir, const STRING &base, + GenericVector* langs) +{ + const STRING base2 = (base.string()[0] == '\0') ? base : base + "/"; + const size_t extlen = sizeof(kTrainedDataSuffix); +#ifdef _WIN32 + WIN32_FIND_DATA data; + HANDLE handle = FindFirstFile((datadir + base2 + "*").string(), &data); + if (handle != INVALID_HANDLE_VALUE) { + BOOL result = TRUE; + for (; result;) { + char *name = data.cFileName; + // Skip '.', '..', and hidden files + if (name[0] != '.') { + if ((data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == + FILE_ATTRIBUTE_DIRECTORY) { + addAvailableLanguages(datadir, base2 + name, langs); + } else { + size_t len = strlen(name); + if (len > extlen && name[len - extlen] == '.' && + strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) { + name[len - extlen] = '\0'; + langs->push_back(base2 + name); + } + } + } + result = FindNextFile(handle, &data); + } + FindClose(handle); + } +#else // _WIN32 + DIR* dir = opendir((datadir + base).string()); + if (dir != nullptr) { + dirent *de; + while ((de = readdir(dir))) { + char *name = de->d_name; + // Skip '.', '..', and hidden files + if (name[0] != '.') { + struct stat st; + if (stat((datadir + base2 + name).string(), &st) == 0 && + (st.st_mode & S_IFDIR) == S_IFDIR) { + addAvailableLanguages(datadir, base2 + name, langs); + } else { + size_t len = strlen(name); + if (len > extlen && name[len - extlen] == '.' && + strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) { + name[len - extlen] = '\0'; + langs->push_back(base2 + name); + } + } + } + } + closedir(dir); + } +#endif +} + +// Compare two STRING values (used for sorting). +static int CompareSTRING(const void* p1, const void* p2) { + const auto* s1 = static_cast(p1); + const auto* s2 = static_cast(p2); + return strcmp(s1->c_str(), s2->c_str()); +} + +TessBaseAPI::TessBaseAPI() + : tesseract_(nullptr), + osd_tesseract_(nullptr), + equ_detect_(nullptr), + reader_(nullptr), + // Thresholder is initialized to nullptr here, but will be set before use by: + // A constructor of a derived API, SetThresholder(), or + // created implicitly when used in InternalSetImage. + thresholder_(nullptr), + paragraph_models_(nullptr), + block_list_(nullptr), + page_res_(nullptr), + input_file_(nullptr), + output_file_(nullptr), + datapath_(nullptr), + language_(nullptr), + last_oem_requested_(OEM_DEFAULT), + recognition_done_(false), + truth_cb_(nullptr), + rect_left_(0), + rect_top_(0), + rect_width_(0), + rect_height_(0), + image_width_(0), + image_height_(0) { +#if defined(DEBUG) + // The Tesseract executables would use the "C" locale by default, + // but other software which is linked against the Tesseract library + // typically uses the locale from the user's environment. + // Here the default is overridden to allow debugging of potential + // problems caused by the locale settings. + + // Use the current locale if building debug code. + std::locale::global(std::locale("")); +#endif +} + +TessBaseAPI::~TessBaseAPI() { + End(); +} + +int TessBaseAPI::MyOSD() { + return FindLines(); +} + +//int TessBaseAPI::AnalyseLayout() { +// return AnalyseLayout(false); +//} + +/** + * Returns the version identifier as a static string. Do not delete. + */ +const char* TessBaseAPI::Version() { + return PACKAGE_VERSION; +} + +/** + * If compiled with OpenCL AND an available OpenCL + * device is deemed faster than serial code, then + * "device" is populated with the cl_device_id + * and returns sizeof(cl_device_id) + * otherwise *device=nullptr and returns 0. + */ +size_t TessBaseAPI::getOpenCLDevice(void **data) { +#ifdef USE_OPENCL + ds_device device = OpenclDevice::getDeviceSelection(); + if (device.type == DS_DEVICE_OPENCL_DEVICE) { + *data = new cl_device_id; + memcpy(*data, &device.oclDeviceID, sizeof(cl_device_id)); + return sizeof(cl_device_id); + } +#endif + + *data = nullptr; + return 0; +} + +/** + * This method used to write the thresholded image to stderr as a PBM file + * on receipt of a SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only). + */ +void TessBaseAPI::CatchSignals() { + // Warn API users that an implementation is needed. + tprintf("Deprecated method CatchSignals has only a dummy implementation!\n"); +} + +/** + * Set the name of the input file. Needed only for training and + * loading a UNLV zone file. + */ +void TessBaseAPI::SetInputName(const char* name) { + if (input_file_ == nullptr) + input_file_ = new STRING(name); + else + *input_file_ = name; +} + +/** Set the name of the output files. Needed only for debugging. */ +void TessBaseAPI::SetOutputName(const char* name) { + if (output_file_ == nullptr) + output_file_ = new STRING(name); + else + *output_file_ = name; +} + +bool TessBaseAPI::SetVariable(const char* name, const char* value) { + if (tesseract_ == nullptr) tesseract_ = new Tesseract; + return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY, + tesseract_->params()); +} + +bool TessBaseAPI::SetDebugVariable(const char* name, const char* value) { + if (tesseract_ == nullptr) tesseract_ = new Tesseract; + return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY, + tesseract_->params()); +} + +bool TessBaseAPI::GetIntVariable(const char *name, int *value) const { + auto *p = ParamUtils::FindParam( + name, GlobalParams()->int_params, tesseract_->params()->int_params); + if (p == nullptr) return false; + *value = (int32_t)(*p); + return true; +} + +bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const { + auto *p = ParamUtils::FindParam( + name, GlobalParams()->bool_params, tesseract_->params()->bool_params); + if (p == nullptr) return false; + *value = bool(*p); + return true; +} + +const char *TessBaseAPI::GetStringVariable(const char *name) const { + auto *p = ParamUtils::FindParam( + name, GlobalParams()->string_params, tesseract_->params()->string_params); + return (p != nullptr) ? p->string() : nullptr; +} + +bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const { + auto *p = ParamUtils::FindParam( + name, GlobalParams()->double_params, tesseract_->params()->double_params); + if (p == nullptr) return false; + *value = (double)(*p); + return true; +} + +/** Get value of named variable as a string, if it exists. */ +bool TessBaseAPI::GetVariableAsString(const char *name, STRING *val) { + return ParamUtils::GetParamAsString(name, tesseract_->params(), val); +} + +/** Print Tesseract parameters to the given file. */ +void TessBaseAPI::PrintVariables(FILE *fp) const { + ParamUtils::PrintParams(fp, tesseract_->params()); +} + +/** + * The datapath must be the name of the data directory or + * some other file in which the data directory resides (for instance argv[0].) + * The language is (usually) an ISO 639-3 string or nullptr will default to eng. + * If numeric_mode is true, then only digits and Roman numerals will + * be returned. + * @return: 0 on success and -1 on initialization failure. + */ +int TessBaseAPI::Init(const char* datapath, const char* language, + OcrEngineMode oem, char **configs, int configs_size, + const GenericVector *vars_vec, + const GenericVector *vars_values, + bool set_only_non_debug_params) { + return Init(datapath, 0, language, oem, configs, configs_size, vars_vec, + vars_values, set_only_non_debug_params, nullptr); +} + +// In-memory version reads the traineddata file directly from the given +// data[data_size] array. Also implements the version with a datapath in data, +// flagged by data_size = 0. +int TessBaseAPI::Init(const char* data, int data_size, const char* language, + OcrEngineMode oem, char** configs, int configs_size, + const GenericVector* vars_vec, + const GenericVector* vars_values, + bool set_only_non_debug_params, FileReader reader) { + // Default language is "eng". + if (language == nullptr) language = "eng"; + STRING datapath = data_size == 0 ? data : language; + // If the datapath, OcrEngineMode or the language have changed - start again. + // Note that the language_ field stores the last requested language that was + // initialized successfully, while tesseract_->lang stores the language + // actually used. They differ only if the requested language was nullptr, in + // which case tesseract_->lang is set to the Tesseract default ("eng"). + if (tesseract_ != nullptr && + (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath || + last_oem_requested_ != oem || + (*language_ != language && tesseract_->lang != language))) { + delete tesseract_; + tesseract_ = nullptr; + } +#ifdef USE_OPENCL + OpenclDevice od; + od.InitEnv(); +#endif + bool reset_classifier = true; + if (tesseract_ == nullptr) { + reset_classifier = false; + tesseract_ = new Tesseract; + if (reader != nullptr) reader_ = reader; + TessdataManager mgr(reader_); + if (data_size != 0) { + mgr.LoadMemBuffer(language, data, data_size); + } + if (tesseract_->init_tesseract( + datapath.string(), + output_file_ != nullptr ? output_file_->string() : nullptr, + language, oem, configs, configs_size, vars_vec, vars_values, + set_only_non_debug_params, &mgr) != 0) { + return -1; + } + } + + // Update datapath and language requested for the last valid initialization. + if (datapath_ == nullptr) + datapath_ = new STRING(datapath); + else + *datapath_ = datapath; + if ((strcmp(datapath_->string(), "") == 0) && + (strcmp(tesseract_->datadir.string(), "") != 0)) + *datapath_ = tesseract_->datadir; + + if (language_ == nullptr) + language_ = new STRING(language); + else + *language_ = language; + last_oem_requested_ = oem; + +#ifndef DISABLED_LEGACY_ENGINE + // For same language and datapath, just reset the adaptive classifier. + if (reset_classifier) { + tesseract_->ResetAdaptiveClassifier(); + } +#endif // ndef DISABLED_LEGACY_ENGINE + return 0; +} + +/** + * Returns the languages string used in the last valid initialization. + * If the last initialization specified "deu+hin" then that will be + * returned. If hin loaded eng automatically as well, then that will + * not be included in this list. To find the languages actually + * loaded use GetLoadedLanguagesAsVector. + * The returned string should NOT be deleted. + */ +const char* TessBaseAPI::GetInitLanguagesAsString() const { + return (language_ == nullptr || language_->string() == nullptr) ? + "" : language_->string(); +} + +/** + * Returns the loaded languages in the vector of STRINGs. + * Includes all languages loaded by the last Init, including those loaded + * as dependencies of other loaded languages. + */ +void TessBaseAPI::GetLoadedLanguagesAsVector( + GenericVector* langs) const { + langs->clear(); + if (tesseract_ != nullptr) { + langs->push_back(tesseract_->lang); + int num_subs = tesseract_->num_sub_langs(); + for (int i = 0; i < num_subs; ++i) + langs->push_back(tesseract_->get_sub_lang(i)->lang); + } +} + +/** + * Returns the available languages in the sorted vector of STRINGs. + */ +void TessBaseAPI::GetAvailableLanguagesAsVector( + GenericVector* langs) const { + langs->clear(); + if (tesseract_ != nullptr) { + addAvailableLanguages(tesseract_->datadir, "", langs); + langs->sort(CompareSTRING); + } +} + +//TODO(amit): Adapt to lstm +#ifndef DISABLED_LEGACY_ENGINE +/** + * Init only the lang model component of Tesseract. The only functions + * that work after this init are SetVariable and IsValidWord. + * WARNING: temporary! This function will be removed from here and placed + * in a separate API at some future time. + */ +int TessBaseAPI::InitLangMod(const char* datapath, const char* language) { + if (tesseract_ == nullptr) + tesseract_ = new Tesseract; + else + ParamUtils::ResetToDefaults(tesseract_->params()); + TessdataManager mgr; + return tesseract_->init_tesseract_lm(datapath, nullptr, language, &mgr); +} +#endif // ndef DISABLED_LEGACY_ENGINE + +/** + * Init only for page layout analysis. Use only for calls to SetImage and + * AnalysePage. Calls that attempt recognition will generate an error. + */ +void TessBaseAPI::InitForAnalysePage() { + if (tesseract_ == nullptr) { + tesseract_ = new Tesseract; + #ifndef DISABLED_LEGACY_ENGINE + tesseract_->InitAdaptiveClassifier(nullptr); + #endif + } +} + +/** + * Read a "config" file containing a set of parameter name, value pairs. + * Searches the standard places: tessdata/configs, tessdata/tessconfigs + * and also accepts a relative or absolute path name. + */ +void TessBaseAPI::ReadConfigFile(const char* filename) { + tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY); +} + +/** Same as above, but only set debug params from the given config file. */ +void TessBaseAPI::ReadDebugConfigFile(const char* filename) { + tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_DEBUG_ONLY); +} + +/** + * Set the current page segmentation mode. Defaults to PSM_AUTO. + * The mode is stored as an IntParam so it can also be modified by + * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). + */ +void TessBaseAPI::SetPageSegMode(PageSegMode mode) { + if (tesseract_ == nullptr) + tesseract_ = new Tesseract; + tesseract_->tessedit_pageseg_mode.set_value(mode); +} + +/** Return the current page segmentation mode. */ +PageSegMode TessBaseAPI::GetPageSegMode() const { + if (tesseract_ == nullptr) + return PSM_SINGLE_BLOCK; + return static_cast( + static_cast(tesseract_->tessedit_pageseg_mode)); +} + +/** + * Recognize a rectangle from an image and return the result as a string. + * May be called many times for a single Init. + * Currently has no error checking. + * Greyscale of 8 and color of 24 or 32 bits per pixel may be given. + * Palette color images will not work properly and must be converted to + * 24 bit. + * Binary images of 1 bit per pixel may also be given but they must be + * byte packed with the MSB of the first byte being the first pixel, and a + * one pixel is WHITE. For binary images set bytes_per_pixel=0. + * The recognized text is returned as a char* which is coded + * as UTF8 and must be freed with the delete [] operator. + */ +char* TessBaseAPI::TesseractRect(const unsigned char* imagedata, + int bytes_per_pixel, + int bytes_per_line, + int left, int top, + int width, int height) { + if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize) + return nullptr; // Nothing worth doing. + + // Since this original api didn't give the exact size of the image, + // we have to invent a reasonable value. + int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8; + SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top, + bytes_per_pixel, bytes_per_line); + SetRectangle(left, top, width, height); + + return GetUTF8Text(); +} + +#ifndef DISABLED_LEGACY_ENGINE +/** + * Call between pages or documents etc to free up memory and forget + * adaptive data. + */ +void TessBaseAPI::ClearAdaptiveClassifier() { + if (tesseract_ == nullptr) + return; + tesseract_->ResetAdaptiveClassifier(); + tesseract_->ResetDocumentDictionary(); +} +#endif // ndef DISABLED_LEGACY_ENGINE + +/** + * Provide an image for Tesseract to recognize. Format is as + * TesseractRect above. Copies the image buffer and converts to Pix. + * SetImage clears all recognition results, and sets the rectangle to the + * full image, so it may be followed immediately by a GetUTF8Text, and it + * will automatically perform recognition. + */ +void TessBaseAPI::SetImage(const unsigned char* imagedata, + int width, int height, + int bytes_per_pixel, int bytes_per_line) { + if (InternalSetImage()) { + thresholder_->SetImage(imagedata, width, height, + bytes_per_pixel, bytes_per_line); + SetInputImage(thresholder_->GetPixRect()); + } +} + +void TessBaseAPI::SetSourceResolution(int ppi) { + if (thresholder_) + thresholder_->SetSourceYResolution(ppi); + else + tprintf("Please call SetImage before SetSourceResolution.\n"); +} + +/** + * Provide an image for Tesseract to recognize. As with SetImage above, + * Tesseract takes its own copy of the image, so it need not persist until + * after Recognize. + * Pix vs raw, which to use? + * Use Pix where possible. Tesseract uses Pix as its internal representation + * and it is therefore more efficient to provide a Pix directly. + */ +void TessBaseAPI::SetImage(Pix* pix) { + if (InternalSetImage()) { + if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) { + // remove alpha channel from png + Pix* p1 = pixRemoveAlpha(pix); + pixSetSpp(p1, 3); + (void)pixCopy(pix, p1); + pixDestroy(&p1); + } + thresholder_->SetImage(pix); + SetInputImage(thresholder_->GetPixRect()); + } +} + +/** + * Restrict recognition to a sub-rectangle of the image. Call after SetImage. + * Each SetRectangle clears the recogntion results so multiple rectangles + * can be recognized with the same image. + */ +void TessBaseAPI::SetRectangle(int left, int top, int width, int height) { + if (thresholder_ == nullptr) + return; + thresholder_->SetRectangle(left, top, width, height); + ClearResults(); +} + +/** + * ONLY available after SetImage if you have Leptonica installed. + * Get a copy of the internal thresholded image from Tesseract. + */ +Pix* TessBaseAPI::GetThresholdedImage() { + if (tesseract_ == nullptr || thresholder_ == nullptr) return nullptr; + if (tesseract_->pix_binary() == nullptr && + !Threshold(tesseract_->mutable_pix_binary())) { + return nullptr; + } + return pixClone(tesseract_->pix_binary()); +} + +/** + * Get the result of page layout analysis as a leptonica-style + * Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + */ +Boxa* TessBaseAPI::GetRegions(Pixa** pixa) { + return GetComponentImages(RIL_BLOCK, false, pixa, nullptr); +} + +/** + * Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + * If blockids is not nullptr, the block-id of each line is also returned as an + * array of one element per line. delete [] after use. + * If paraids is not nullptr, the paragraph-id of each line within its block is + * also returned as an array of one element per line. delete [] after use. + */ +Boxa* TessBaseAPI::GetTextlines(const bool raw_image, const int raw_padding, + Pixa** pixa, int** blockids, int** paraids) { + return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding, + pixa, blockids, paraids); +} + +/** + * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa + * pair, in reading order. Enables downstream handling of non-rectangular + * regions. + * Can be called before or after Recognize. + * If blockids is not nullptr, the block-id of each line is also returned as an + * array of one element per line. delete [] after use. + */ +Boxa* TessBaseAPI::GetStrips(Pixa** pixa, int** blockids) { + return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids); +} + +/** + * Get the words as a leptonica-style + * Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + */ +Boxa* TessBaseAPI::GetWords(Pixa** pixa) { + return GetComponentImages(RIL_WORD, true, pixa, nullptr); +} + +/** + * Gets the individual connected (text) components (created + * after pages segmentation step, but before recognition) + * as a leptonica-style Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + */ +Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) { + return GetComponentImages(RIL_SYMBOL, true, pixa, nullptr); +} + +/** + * Get the given level kind of components (block, textline, word etc.) as a + * leptonica-style Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + * If blockids is not nullptr, the block-id of each component is also returned + * as an array of one element per component. delete [] after use. + * If text_only is true, then only text components are returned. + */ +Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level, + bool text_only, bool raw_image, + const int raw_padding, + Pixa** pixa, int** blockids, + int** paraids) { + PageIterator* page_it = GetIterator(); + if (page_it == nullptr) + page_it = AnalyseLayout(); + if (page_it == nullptr) + return nullptr; // Failed. + + // Count the components to get a size for the arrays. + int component_count = 0; + int left, top, right, bottom; + + TessResultCallback* get_bbox = nullptr; + if (raw_image) { + // Get bounding box in original raw image with padding. + get_bbox = NewPermanentTessCallback(page_it, &PageIterator::BoundingBox, + level, raw_padding, + &left, &top, &right, &bottom); + } else { + // Get bounding box from binarized imaged. Note that this could be + // differently scaled from the original image. + get_bbox = NewPermanentTessCallback(page_it, + &PageIterator::BoundingBoxInternal, + level, &left, &top, &right, &bottom); + } + do { + if (get_bbox->Run() && + (!text_only || PTIsTextType(page_it->BlockType()))) + ++component_count; + } while (page_it->Next(level)); + + Boxa* boxa = boxaCreate(component_count); + if (pixa != nullptr) + *pixa = pixaCreate(component_count); + if (blockids != nullptr) + *blockids = new int[component_count]; + if (paraids != nullptr) + *paraids = new int[component_count]; + + int blockid = 0; + int paraid = 0; + int component_index = 0; + page_it->Begin(); + do { + if (get_bbox->Run() && + (!text_only || PTIsTextType(page_it->BlockType()))) { + Box* lbox = boxCreate(left, top, right - left, bottom - top); + boxaAddBox(boxa, lbox, L_INSERT); + if (pixa != nullptr) { + Pix* pix = nullptr; + if (raw_image) { + pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left, + &top); + } else { + pix = page_it->GetBinaryImage(level); + } + pixaAddPix(*pixa, pix, L_INSERT); + pixaAddBox(*pixa, lbox, L_CLONE); + } + if (paraids != nullptr) { + (*paraids)[component_index] = paraid; + if (page_it->IsAtFinalElement(RIL_PARA, level)) + ++paraid; + } + if (blockids != nullptr) { + (*blockids)[component_index] = blockid; + if (page_it->IsAtFinalElement(RIL_BLOCK, level)) { + ++blockid; + paraid = 0; + } + } + ++component_index; + } + } while (page_it->Next(level)); + delete page_it; + delete get_bbox; + return boxa; +} + +int TessBaseAPI::GetThresholdedImageScaleFactor() const { + if (thresholder_ == nullptr) { + return 0; + } + return thresholder_->GetScaleFactor(); +} + +/** + * Runs page layout analysis in the mode set by SetPageSegMode. + * May optionally be called prior to Recognize to get access to just + * the page layout results. Returns an iterator to the results. + * If merge_similar_words is true, words are combined where suitable for use + * with a line recognizer. Use if you want to use AnalyseLayout to find the + * textlines, and then want to process textline fragments with an external + * line recognizer. + * Returns nullptr on error or an empty page. + * The returned iterator must be deleted after use. + * WARNING! This class points to data held within the TessBaseAPI class, and + * therefore can only be used while the TessBaseAPI class still exists and + * has not been subjected to a call of Init, SetImage, Recognize, Clear, End + * DetectOS, or anything else that changes the internal PAGE_RES. + */ +PageIterator* TessBaseAPI::AnalyseLayout() { return AnalyseLayout(false); } + +PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) { + if (FindLines() == 0) { + if (block_list_->empty()) + return nullptr; // The page was empty. + page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr); + DetectParagraphs(false); + return new PageIterator( + page_res_, tesseract_, thresholder_->GetScaleFactor(), + thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_); + } + return nullptr; +} + +/** + * Recognize the tesseract global image and return the result as Tesseract + * internal structures. + */ +int TessBaseAPI::Recognize(ETEXT_DESC* monitor) { + if (tesseract_ == nullptr) + return -1; + if (FindLines() != 0) + return -1; + delete page_res_; + if (block_list_->empty()) { + page_res_ = new PAGE_RES(false, block_list_, + &tesseract_->prev_word_best_choice_); + return 0; // Empty page. + } + + tesseract_->SetBlackAndWhitelist(); + recognition_done_ = true; +#ifndef DISABLED_LEGACY_ENGINE + if (tesseract_->tessedit_resegment_from_line_boxes) { + page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_); + } else if (tesseract_->tessedit_resegment_from_boxes) { + page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_); + } else +#endif // ndef DISABLED_LEGACY_ENGINE + { + page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(), + block_list_, &tesseract_->prev_word_best_choice_); + } + + if (page_res_ == nullptr) { + return -1; + } + + if (tesseract_->tessedit_train_line_recognizer) { + if (!tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_)) { + return -1; + } + tesseract_->CorrectClassifyWords(page_res_); + return 0; + } +#ifndef DISABLED_LEGACY_ENGINE + if (tesseract_->tessedit_make_boxes_from_boxes) { + tesseract_->CorrectClassifyWords(page_res_); + return 0; + } +#endif // ndef DISABLED_LEGACY_ENGINE + + if (truth_cb_ != nullptr) { + tesseract_->wordrec_run_blamer.set_value(true); + auto *page_it = new PageIterator( + page_res_, tesseract_, thresholder_->GetScaleFactor(), + thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_); + truth_cb_->Run(tesseract_->getDict().getUnicharset(), + image_height_, page_it, this->tesseract()->pix_grey()); + delete page_it; + } + + int result = 0; + if (tesseract_->interactive_display_mode) { + #ifndef GRAPHICS_DISABLED + tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); + #endif // GRAPHICS_DISABLED + // The page_res is invalid after an interactive session, so cleanup + // in a way that lets us continue to the next page without crashing. + delete page_res_; + page_res_ = nullptr; + return -1; + #ifndef DISABLED_LEGACY_ENGINE + } else if (tesseract_->tessedit_train_from_boxes) { + STRING fontname; + ExtractFontName(*output_file_, &fontname); + tesseract_->ApplyBoxTraining(fontname, page_res_); + } else if (tesseract_->tessedit_ambigs_training) { + FILE *training_output_file = tesseract_->init_recog_training(*input_file_); + // OCR the page segmented into words by tesseract. + tesseract_->recog_training_segmented( + *input_file_, page_res_, monitor, training_output_file); + fclose(training_output_file); + #endif // ndef DISABLED_LEGACY_ENGINE + } else { + // Now run the main recognition. + bool wait_for_text = true; + GetBoolVariable("paragraph_text_based", &wait_for_text); + if (!wait_for_text) DetectParagraphs(false); + if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) { + if (wait_for_text) DetectParagraphs(true); + } else { + result = -1; + } + } + return result; +} + +#ifndef DISABLED_LEGACY_ENGINE +/** Tests the chopper by exhaustively running chop_one_blob. */ +int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) { + if (tesseract_ == nullptr) + return -1; + if (thresholder_ == nullptr || thresholder_->IsEmpty()) { + tprintf("Please call SetImage before attempting recognition.\n"); + return -1; + } + if (page_res_ != nullptr) + ClearResults(); + if (FindLines() != 0) + return -1; + // Additional conditions under which chopper test cannot be run + if (tesseract_->interactive_display_mode) return -1; + + recognition_done_ = true; + + page_res_ = new PAGE_RES(false, block_list_, + &(tesseract_->prev_word_best_choice_)); + + PAGE_RES_IT page_res_it(page_res_); + + while (page_res_it.word() != nullptr) { + WERD_RES *word_res = page_res_it.word(); + GenericVector boxes; + tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block, + page_res_it.row()->row, word_res); + page_res_it.forward(); + } + return 0; +} +#endif // ndef DISABLED_LEGACY_ENGINE + +// Takes ownership of the input pix. +void TessBaseAPI::SetInputImage(Pix* pix) { tesseract_->set_pix_original(pix); } + +Pix* TessBaseAPI::GetInputImage() { return tesseract_->pix_original(); } + +const char * TessBaseAPI::GetInputName() { + if (input_file_) + return input_file_->c_str(); + return nullptr; +} + +const char * TessBaseAPI::GetDatapath() { + return tesseract_->datadir.c_str(); +} + +int TessBaseAPI::GetSourceYResolution() { + return thresholder_->GetSourceYResolution(); +} + +// If flist exists, get data from there. Otherwise get data from buf. +// Seems convoluted, but is the easiest way I know of to meet multiple +// goals. Support streaming from stdin, and also work on platforms +// lacking fmemopen. +bool TessBaseAPI::ProcessPagesFileList(FILE *flist, + STRING *buf, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer, + int tessedit_page_number) { + if (!flist && !buf) return false; + int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; + char pagename[MAX_PATH]; + + GenericVector lines; + if (!flist) { + buf->split('\n', &lines); + if (lines.empty()) return false; + } + + // Skip to the requested page number. + for (int i = 0; i < page; i++) { + if (flist) { + if (fgets(pagename, sizeof(pagename), flist) == nullptr) break; + } + } + + // Begin producing output + if (renderer && !renderer->BeginDocument(document_title.c_str())) { + return false; + } + + // Loop over all pages - or just the requested one + while (true) { + if (flist) { + if (fgets(pagename, sizeof(pagename), flist) == nullptr) break; + } else { + if (page >= lines.size()) break; + snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str()); + } + chomp_string(pagename); + Pix *pix = pixRead(pagename); + if (pix == nullptr) { + tprintf("Image file %s cannot be read!\n", pagename); + return false; + } + tprintf("Page %d : %s\n", page, pagename); + bool r = ProcessPage(pix, page, pagename, retry_config, + timeout_millisec, renderer); + pixDestroy(&pix); + if (!r) return false; + if (tessedit_page_number >= 0) break; + ++page; + } + + // Finish producing output + if (renderer && !renderer->EndDocument()) { + return false; + } + return true; +} + +bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, + size_t size, + const char* filename, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer, + int tessedit_page_number) { +#ifndef ANDROID_BUILD + Pix *pix = nullptr; + int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; + size_t offset = 0; + for (; ; ++page) { + if (tessedit_page_number >= 0) { + page = tessedit_page_number; + pix = (data) ? pixReadMemTiff(data, size, page) + : pixReadTiff(filename, page); + } else { + pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset) + : pixReadFromMultipageTiff(filename, &offset); + } + if (pix == nullptr) break; + tprintf("Page %d\n", page + 1); + char page_str[kMaxIntSize]; + snprintf(page_str, kMaxIntSize - 1, "%d", page); + SetVariable("applybox_page", page_str); + bool r = ProcessPage(pix, page, filename, retry_config, + timeout_millisec, renderer); + pixDestroy(&pix); + if (!r) return false; + if (tessedit_page_number >= 0) break; + if (!offset) break; + } + return true; +#else + return false; +#endif +} + +// Master ProcessPages calls ProcessPagesInternal and then does any post- +// processing required due to being in a training mode. +bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer) { + bool result = + ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer); + #ifndef DISABLED_LEGACY_ENGINE + if (result) { + if (tesseract_->tessedit_train_from_boxes && + !tesseract_->WriteTRFile(*output_file_)) { + tprintf("Write of TR file failed: %s\n", output_file_->string()); + return false; + } + } + #endif // ndef DISABLED_LEGACY_ENGINE + return result; +} + +static size_t +WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp) +{ + size = size * nmemb; + std::string* buf = reinterpret_cast(userp); + buf->append(reinterpret_cast(contents), size); + return size; +} + +// In the ideal scenario, Tesseract will start working on data as soon +// as it can. For example, if you stream a filelist through stdin, we +// should start the OCR process as soon as the first filename is +// available. This is particularly useful when hooking Tesseract up to +// slow hardware such as a book scanning machine. +// +// Unfortunately there are tradeoffs. You can't seek on stdin. That +// makes automatic detection of datatype (TIFF? filelist? PNG?) +// impractical. So we support a command line flag to explicitly +// identify the scenario that really matters: filelists on +// stdin. We'll still do our best if the user likes pipes. +bool TessBaseAPI::ProcessPagesInternal(const char* filename, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer) { + bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-"); + if (stdInput) { +#ifdef WIN32 + if (_setmode(_fileno(stdin), _O_BINARY) == -1) + tprintf("ERROR: cin to binary: %s", strerror(errno)); +#endif // WIN32 + } + + if (stream_filelist) { + return ProcessPagesFileList(stdin, nullptr, retry_config, + timeout_millisec, renderer, + tesseract_->tessedit_page_number); + } + + // At this point we are officially in autodection territory. + // That means any data in stdin must be buffered, to make it + // seekable. + std::string buf; + const l_uint8 *data = nullptr; + if (stdInput) { + buf.assign((std::istreambuf_iterator(std::cin)), + (std::istreambuf_iterator())); + data = reinterpret_cast(buf.data()); + } else if (strncmp(filename, "http:", 5) == 0 || + strncmp(filename, "https:", 6) == 0 ) { + // Get image or image list by URL. +#ifdef HAVE_LIBCURL + CURL* curl = curl_easy_init(); + if (curl == nullptr) { + fprintf(stderr, "Error, curl_easy_init failed\n"); + return false; + } else { + CURLcode curlcode; + curlcode = curl_easy_setopt(curl, CURLOPT_URL, filename); + ASSERT_HOST(curlcode == CURLE_OK); + curlcode = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback); + ASSERT_HOST(curlcode == CURLE_OK); + curlcode = curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buf); + ASSERT_HOST(curlcode == CURLE_OK); + curlcode = curl_easy_perform(curl); + ASSERT_HOST(curlcode == CURLE_OK); + curl_easy_cleanup(curl); + data = reinterpret_cast(buf.data()); + } +#else + fprintf(stderr, "Error, this tesseract has no URL support\n"); + return false; +#endif + } else { + // Check whether the input file can be read. + if (FILE* file = fopen(filename, "rb")) { + fclose(file); + } else { + fprintf(stderr, "Error, cannot read input file %s: %s\n", + filename, strerror(errno)); + return false; + } + } + + // Here is our autodetection + int format; + int r = (data != nullptr) ? + findFileFormatBuffer(data, &format) : + findFileFormat(filename, &format); + + // Maybe we have a filelist + if (r != 0 || format == IFF_UNKNOWN) { + STRING s; + if (data != nullptr) { + s = buf.c_str(); + } else { + std::ifstream t(filename); + std::string u((std::istreambuf_iterator(t)), + std::istreambuf_iterator()); + s = u.c_str(); + } + return ProcessPagesFileList(nullptr, &s, retry_config, + timeout_millisec, renderer, + tesseract_->tessedit_page_number); + } + + // Maybe we have a TIFF which is potentially multipage + bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS || + format == IFF_TIFF_RLE || format == IFF_TIFF_G3 || + format == IFF_TIFF_G4 || format == IFF_TIFF_LZW || +#if LIBLEPT_MAJOR_VERSION > 1 || LIBLEPT_MINOR_VERSION > 76 + format == IFF_TIFF_JPEG || +#endif + format == IFF_TIFF_ZIP); + + // Fail early if we can, before producing any output + Pix *pix = nullptr; + if (!tiff) { + pix = (data != nullptr) ? pixReadMem(data, buf.size()) : pixRead(filename); + if (pix == nullptr) { + return false; + } + } + + // Begin the output + if (renderer && !renderer->BeginDocument(document_title.c_str())) { + pixDestroy(&pix); + return false; + } + + // Produce output + r = (tiff) ? + ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config, + timeout_millisec, renderer, + tesseract_->tessedit_page_number) : + ProcessPage(pix, 0, filename, retry_config, + timeout_millisec, renderer); + + // Clean up memory as needed + pixDestroy(&pix); + + // End the output + if (!r || (renderer && !renderer->EndDocument())) { + return false; + } + return true; +} + +bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename, + const char* retry_config, int timeout_millisec, + TessResultRenderer* renderer) { + SetInputName(filename); + SetImage(pix); + bool failed = false; + + if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) { + // Disabled character recognition + PageIterator* it = AnalyseLayout(); + + if (it == nullptr) { + failed = true; + } else { + delete it; + } + } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) { + failed = FindLines() != 0; + } else if (timeout_millisec > 0) { + // Running with a timeout. + ETEXT_DESC monitor; + monitor.cancel = nullptr; + monitor.cancel_this = nullptr; + monitor.set_deadline_msecs(timeout_millisec); + + // Now run the main recognition. + failed = Recognize(&monitor) < 0; + } else { + // Normal layout and character recognition with no timeout. + failed = Recognize(nullptr) < 0; + } + + if (tesseract_->tessedit_write_images) { +#ifndef ANDROID_BUILD + Pix* page_pix = GetThresholdedImage(); + pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4); +#endif // ANDROID_BUILD + } + + if (failed && retry_config != nullptr && retry_config[0] != '\0') { + // Save current config variables before switching modes. + FILE* fp = fopen(kOldVarsFile, "wb"); + if (fp == nullptr) { + tprintf("Error, failed to open file \"%s\"\n", kOldVarsFile); + } else { + PrintVariables(fp); + fclose(fp); + } + // Switch to alternate mode for retry. + ReadConfigFile(retry_config); + SetImage(pix); + Recognize(nullptr); + // Restore saved config variables. + ReadConfigFile(kOldVarsFile); + } + + if (renderer && !failed) { + failed = !renderer->AddImage(this); + } + + return !failed; +} + +/** + * Get a left-to-right iterator to the results of LayoutAnalysis and/or + * Recognize. The returned iterator must be deleted after use. + */ +LTRResultIterator* TessBaseAPI::GetLTRIterator() { + if (tesseract_ == nullptr || page_res_ == nullptr) + return nullptr; + return new LTRResultIterator( + page_res_, tesseract_, + thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_); +} + +/** + * Get a reading-order iterator to the results of LayoutAnalysis and/or + * Recognize. The returned iterator must be deleted after use. + * WARNING! This class points to data held within the TessBaseAPI class, and + * therefore can only be used while the TessBaseAPI class still exists and + * has not been subjected to a call of Init, SetImage, Recognize, Clear, End + * DetectOS, or anything else that changes the internal PAGE_RES. + */ +ResultIterator* TessBaseAPI::GetIterator() { + if (tesseract_ == nullptr || page_res_ == nullptr) + return nullptr; + return ResultIterator::StartOfParagraph(LTRResultIterator( + page_res_, tesseract_, + thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_)); +} + +/** + * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. + * The returned iterator must be deleted after use. + * WARNING! This class points to data held within the TessBaseAPI class, and + * therefore can only be used while the TessBaseAPI class still exists and + * has not been subjected to a call of Init, SetImage, Recognize, Clear, End + * DetectOS, or anything else that changes the internal PAGE_RES. + */ +MutableIterator* TessBaseAPI::GetMutableIterator() { + if (tesseract_ == nullptr || page_res_ == nullptr) + return nullptr; + return new MutableIterator(page_res_, tesseract_, + thresholder_->GetScaleFactor(), + thresholder_->GetScaledYResolution(), + rect_left_, rect_top_, rect_width_, rect_height_); +} + +/** Make a text string from the internal data structures. */ +char* TessBaseAPI::GetUTF8Text() { + if (tesseract_ == nullptr || + (!recognition_done_ && Recognize(nullptr) < 0)) + return nullptr; + STRING text(""); + ResultIterator *it = GetIterator(); + do { + if (it->Empty(RIL_PARA)) continue; + const std::unique_ptr para_text(it->GetUTF8Text(RIL_PARA)); + text += para_text.get(); + } while (it->Next(RIL_PARA)); + char* result = new char[text.length() + 1]; + strncpy(result, text.string(), text.length() + 1); + delete it; + return result; +} + +static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level, + STRING* text) { + int left, top, right, bottom; + it->BoundingBox(level, &left, &top, &right, &bottom); + text->add_str_int("\t", left); + text->add_str_int("\t", top); + text->add_str_int("\t", right - left); + text->add_str_int("\t", bottom - top); +} + +/** + * Make a TSV-formatted string from the internal data structures. + * page_number is 0-based but will appear in the output as 1-based. + * Returned string must be freed with the delete [] operator. + */ +char* TessBaseAPI::GetTSVText(int page_number) { + if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0)) + return nullptr; + + int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1; + int page_id = page_number + 1; // we use 1-based page numbers. + + STRING tsv_str(""); + + int page_num = page_id; + int block_num = 0; + int par_num = 0; + int line_num = 0; + int word_num = 0; + + tsv_str.add_str_int("1\t", page_num); // level 1 - page + tsv_str.add_str_int("\t", block_num); + tsv_str.add_str_int("\t", par_num); + tsv_str.add_str_int("\t", line_num); + tsv_str.add_str_int("\t", word_num); + tsv_str.add_str_int("\t", rect_left_); + tsv_str.add_str_int("\t", rect_top_); + tsv_str.add_str_int("\t", rect_width_); + tsv_str.add_str_int("\t", rect_height_); + tsv_str += "\t-1\t\n"; + + ResultIterator* res_it = GetIterator(); + while (!res_it->Empty(RIL_BLOCK)) { + if (res_it->Empty(RIL_WORD)) { + res_it->Next(RIL_WORD); + continue; + } + + // Add rows for any new block/paragraph/textline. + if (res_it->IsAtBeginningOf(RIL_BLOCK)) { + block_num++; + par_num = 0; + line_num = 0; + word_num = 0; + tsv_str.add_str_int("2\t", page_num); // level 2 - block + tsv_str.add_str_int("\t", block_num); + tsv_str.add_str_int("\t", par_num); + tsv_str.add_str_int("\t", line_num); + tsv_str.add_str_int("\t", word_num); + AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str); + tsv_str += "\t-1\t\n"; // end of row for block + } + if (res_it->IsAtBeginningOf(RIL_PARA)) { + par_num++; + line_num = 0; + word_num = 0; + tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph + tsv_str.add_str_int("\t", block_num); + tsv_str.add_str_int("\t", par_num); + tsv_str.add_str_int("\t", line_num); + tsv_str.add_str_int("\t", word_num); + AddBoxToTSV(res_it, RIL_PARA, &tsv_str); + tsv_str += "\t-1\t\n"; // end of row for para + } + if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { + line_num++; + word_num = 0; + tsv_str.add_str_int("4\t", page_num); // level 4 - line + tsv_str.add_str_int("\t", block_num); + tsv_str.add_str_int("\t", par_num); + tsv_str.add_str_int("\t", line_num); + tsv_str.add_str_int("\t", word_num); + AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str); + tsv_str += "\t-1\t\n"; // end of row for line + } + + // Now, process the word... + int left, top, right, bottom; + res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom); + word_num++; + tsv_str.add_str_int("5\t", page_num); // level 5 - word + tsv_str.add_str_int("\t", block_num); + tsv_str.add_str_int("\t", par_num); + tsv_str.add_str_int("\t", line_num); + tsv_str.add_str_int("\t", word_num); + tsv_str.add_str_int("\t", left); + tsv_str.add_str_int("\t", top); + tsv_str.add_str_int("\t", right - left); + tsv_str.add_str_int("\t", bottom - top); + tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD)); + tsv_str += "\t"; + + // Increment counts if at end of block/paragraph/textline. + if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++; + if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++; + if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++; + + do { + tsv_str += + std::unique_ptr(res_it->GetUTF8Text(RIL_SYMBOL)).get(); + res_it->Next(RIL_SYMBOL); + } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); + tsv_str += "\n"; // end of row + wcnt++; + } + + char* ret = new char[tsv_str.length() + 1]; + strcpy(ret, tsv_str.string()); + delete res_it; + return ret; +} + +/** The 5 numbers output for each box (the usual 4 and a page number.) */ +const int kNumbersPerBlob = 5; +/** + * The number of bytes taken by each number. Since we use int16_t for ICOORD, + * assume only 5 digits max. + */ +const int kBytesPerNumber = 5; +/** + * Multiplier for max expected textlength assumes (kBytesPerNumber + space) + * * kNumbersPerBlob plus the newline. Add to this the + * original UTF8 characters, and one kMaxBytesPerLine for safety. + */ +const int kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1; +/** Max bytes in the decimal representation of int64_t. */ +const int kBytesPer64BitNumber = 20; +/** + * A maximal single box could occupy kNumbersPerBlob numbers at + * kBytesPer64BitNumber digits (if someone sneaks in a 64 bit value) and a + * space plus the newline and the maximum length of a UNICHAR. + * Test against this on each iteration for safety. + */ +const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 + + UNICHAR_LEN; + +/** + * The recognized text is returned as a char* which is coded + * as a UTF8 box file. + * page_number is a 0-base page index that will appear in the box file. + * Returned string must be freed with the delete [] operator. + */ +char* TessBaseAPI::GetBoxText(int page_number) { + if (tesseract_ == nullptr || + (!recognition_done_ && Recognize(nullptr) < 0)) + return nullptr; + int blob_count; + int utf8_length = TextLength(&blob_count); + int total_length = blob_count * kBytesPerBoxFileLine + utf8_length + + kMaxBytesPerLine; + char* result = new char[total_length]; + result[0] = '\0'; + int output_length = 0; + LTRResultIterator* it = GetLTRIterator(); + do { + int left, top, right, bottom; + if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) { + const std::unique_ptr text( + it->GetUTF8Text(RIL_SYMBOL)); + // Tesseract uses space for recognition failure. Fix to a reject + // character, kTesseractReject so we don't create illegal box files. + for (int i = 0; text[i] != '\0'; ++i) { + if (text[i] == ' ') + text[i] = kTesseractReject; + } + snprintf(result + output_length, total_length - output_length, + "%s %d %d %d %d %d\n", text.get(), left, image_height_ - bottom, + right, image_height_ - top, page_number); + output_length += strlen(result + output_length); + // Just in case... + if (output_length + kMaxBytesPerLine > total_length) + break; + } + } while (it->Next(RIL_SYMBOL)); + delete it; + return result; +} + +/** + * Conversion table for non-latin characters. + * Maps characters out of the latin set into the latin set. + * TODO(rays) incorporate this translation into unicharset. + */ +const int kUniChs[] = { + 0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0 +}; +/** Latin chars corresponding to the unicode chars above. */ +const int kLatinChs[] = { + 0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0 +}; + +/** + * The recognized text is returned as a char* which is coded + * as UNLV format Latin-1 with specific reject and suspect codes. + * Returned string must be freed with the delete [] operator. + */ +char* TessBaseAPI::GetUNLVText() { + if (tesseract_ == nullptr || + (!recognition_done_ && Recognize(nullptr) < 0)) + return nullptr; + bool tilde_crunch_written = false; + bool last_char_was_newline = true; + bool last_char_was_tilde = false; + + int total_length = TextLength(nullptr); + PAGE_RES_IT page_res_it(page_res_); + char* result = new char[total_length]; + char* ptr = result; + for (page_res_it.restart_page(); page_res_it.word () != nullptr; + page_res_it.forward()) { + WERD_RES *word = page_res_it.word(); + // Process the current word. + if (word->unlv_crunch_mode != CR_NONE) { + if (word->unlv_crunch_mode != CR_DELETE && + (!tilde_crunch_written || + (word->unlv_crunch_mode == CR_KEEP_SPACE && + word->word->space() > 0 && + !word->word->flag(W_FUZZY_NON) && + !word->word->flag(W_FUZZY_SP)))) { + if (!word->word->flag(W_BOL) && + word->word->space() > 0 && + !word->word->flag(W_FUZZY_NON) && + !word->word->flag(W_FUZZY_SP)) { + /* Write a space to separate from preceding good text */ + *ptr++ = ' '; + last_char_was_tilde = false; + } + if (!last_char_was_tilde) { + // Write a reject char. + last_char_was_tilde = true; + *ptr++ = kUNLVReject; + tilde_crunch_written = true; + last_char_was_newline = false; + } + } + } else { + // NORMAL PROCESSING of non tilde crunched words. + tilde_crunch_written = false; + tesseract_->set_unlv_suspects(word); + const char* wordstr = word->best_choice->unichar_string().string(); + const STRING& lengths = word->best_choice->unichar_lengths(); + int length = lengths.length(); + int i = 0; + int offset = 0; + + if (last_char_was_tilde && + word->word->space() == 0 && wordstr[offset] == ' ') { + // Prevent adjacent tilde across words - we know that adjacent tildes + // within words have been removed. + // Skip the first character. + offset = lengths[i++]; + } + if (i < length && wordstr[offset] != 0) { + if (!last_char_was_newline) + *ptr++ = ' '; + else + last_char_was_newline = false; + for (; i < length; offset += lengths[i++]) { + if (wordstr[offset] == ' ' || + wordstr[offset] == kTesseractReject) { + *ptr++ = kUNLVReject; + last_char_was_tilde = true; + } else { + if (word->reject_map[i].rejected()) + *ptr++ = kUNLVSuspect; + UNICHAR ch(wordstr + offset, lengths[i]); + int uni_ch = ch.first_uni(); + for (int j = 0; kUniChs[j] != 0; ++j) { + if (kUniChs[j] == uni_ch) { + uni_ch = kLatinChs[j]; + break; + } + } + if (uni_ch <= 0xff) { + *ptr++ = static_cast(uni_ch); + last_char_was_tilde = false; + } else { + *ptr++ = kUNLVReject; + last_char_was_tilde = true; + } + } + } + } + } + if (word->word->flag(W_EOL) && !last_char_was_newline) { + /* Add a new line output */ + *ptr++ = '\n'; + tilde_crunch_written = false; + last_char_was_newline = true; + last_char_was_tilde = false; + } + } + *ptr++ = '\n'; + *ptr = '\0'; + return result; +} + +#ifndef DISABLED_LEGACY_ENGINE + +/** + * Detect the orientation of the input image and apparent script (alphabet). + * orient_deg is the detected clockwise rotation of the input image in degrees + * (0, 90, 180, 270) + * orient_conf is the confidence (15.0 is reasonably confident) + * script_name is an ASCII string, the name of the script, e.g. "Latin" + * script_conf is confidence level in the script + * Returns true on success and writes values to each parameter as an output + */ +bool TessBaseAPI::DetectOrientationScript(int* orient_deg, float* orient_conf, + const char** script_name, + float* script_conf) { + OSResults osr; + + bool osd = DetectOS(&osr); + if (!osd) { + return false; + } + + int orient_id = osr.best_result.orientation_id; + int script_id = osr.get_best_script(orient_id); + if (orient_conf) *orient_conf = osr.best_result.oconfidence; + if (orient_deg) *orient_deg = orient_id * 90; // convert quadrant to degrees + + if (script_name) { + const char* script = osr.unicharset->get_script_from_script_id(script_id); + + *script_name = script; + } + + if (script_conf) *script_conf = osr.best_result.sconfidence; + + return true; +} + +/** + * The recognized text is returned as a char* which is coded + * as UTF8 and must be freed with the delete [] operator. + * page_number is a 0-based page index that will appear in the osd file. + */ +char* TessBaseAPI::GetOsdText(int page_number) { + int orient_deg; + float orient_conf; + const char* script_name; + float script_conf; + + if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name, + &script_conf)) + return nullptr; + + // clockwise rotation needed to make the page upright + int rotate = OrientationIdToValue(orient_deg / 90); + + std::stringstream stream; + // Use "C" locale (needed for float values orient_conf and script_conf). + stream.imbue(std::locale::classic()); + // Use fixed notation with 2 digits after the decimal point for float values. + stream.precision(2); + stream + << std::fixed + << "Page number: " << page_number << "\n" + << "Orientation in degrees: " << orient_deg << "\n" + << "Rotate: " << rotate << "\n" + << "Orientation confidence: " << orient_conf << "\n" + << "Script: " << script_name << "\n" + << "Script confidence: " << script_conf << "\n"; + const std::string& text = stream.str(); + char* result = new char[text.length() + 1]; + strcpy(result, text.c_str()); + return result; +} + +#endif // ndef DISABLED_LEGACY_ENGINE + +/** Returns the average word confidence for Tesseract page result. */ +int TessBaseAPI::MeanTextConf() { + int* conf = AllWordConfidences(); + if (!conf) return 0; + int sum = 0; + int *pt = conf; + while (*pt >= 0) sum += *pt++; + if (pt != conf) sum /= pt - conf; + delete [] conf; + return sum; +} + +/** Returns an array of all word confidences, terminated by -1. */ +int* TessBaseAPI::AllWordConfidences() { + if (tesseract_ == nullptr || + (!recognition_done_ && Recognize(nullptr) < 0)) + return nullptr; + int n_word = 0; + PAGE_RES_IT res_it(page_res_); + for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) + n_word++; + + int* conf = new int[n_word+1]; + n_word = 0; + for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) { + WERD_RES *word = res_it.word(); + WERD_CHOICE* choice = word->best_choice; + int w_conf = static_cast(100 + 5 * choice->certainty()); + // This is the eq for converting Tesseract confidence to 1..100 + if (w_conf < 0) w_conf = 0; + if (w_conf > 100) w_conf = 100; + conf[n_word++] = w_conf; + } + conf[n_word] = -1; + return conf; +} + +#ifndef DISABLED_LEGACY_ENGINE +/** + * Applies the given word to the adaptive classifier if possible. + * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can + * tell the boundaries of the graphemes. + * Assumes that SetImage/SetRectangle have been used to set the image + * to the given word. The mode arg should be PSM_SINGLE_WORD or + * PSM_CIRCLE_WORD, as that will be used to control layout analysis. + * The currently set PageSegMode is preserved. + * Returns false if adaption was not possible for some reason. + */ +bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) { + int debug = 0; + GetIntVariable("applybox_debug", &debug); + bool success = true; + PageSegMode current_psm = GetPageSegMode(); + SetPageSegMode(mode); + SetVariable("classify_enable_learning", "0"); + const std::unique_ptr text(GetUTF8Text()); + if (debug) { + tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr); + } + if (text != nullptr) { + PAGE_RES_IT it(page_res_); + WERD_RES* word_res = it.word(); + if (word_res != nullptr) { + word_res->word->set_text(wordstr); + // Check to see if text matches wordstr. + int w = 0; + int t; + for (t = 0; text[t] != '\0'; ++t) { + if (text[t] == '\n' || text[t] == ' ') + continue; + while (wordstr[w] == ' ') ++w; + if (text[t] != wordstr[w]) + break; + ++w; + } + if (text[t] != '\0' || wordstr[w] != '\0') { + // No match. + delete page_res_; + GenericVector boxes; + page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_); + tesseract_->ReSegmentByClassification(page_res_); + tesseract_->TidyUp(page_res_); + PAGE_RES_IT pr_it(page_res_); + if (pr_it.word() == nullptr) + success = false; + else + word_res = pr_it.word(); + } else { + word_res->BestChoiceToCorrectText(); + } + if (success) { + tesseract_->EnableLearning = true; + tesseract_->LearnWord(nullptr, word_res); + } + } else { + success = false; + } + } else { + success = false; + } + SetPageSegMode(current_psm); + return success; +} +#endif // ndef DISABLED_LEGACY_ENGINE + +/** + * Free up recognition results and any stored image data, without actually + * freeing any recognition data that would be time-consuming to reload. + * Afterwards, you must call SetImage or TesseractRect before doing + * any Recognize or Get* operation. + */ +void TessBaseAPI::Clear() { + if (thresholder_ != nullptr) + thresholder_->Clear(); + ClearResults(); + if (tesseract_ != nullptr) SetInputImage(nullptr); +} + +/** + * Close down tesseract and free up all memory. End() is equivalent to + * destructing and reconstructing your TessBaseAPI. + * Once End() has been used, none of the other API functions may be used + * other than Init and anything declared above it in the class definition. + */ +void TessBaseAPI::End() { + Clear(); + delete thresholder_; + thresholder_ = nullptr; + delete page_res_; + page_res_ = nullptr; + delete block_list_; + block_list_ = nullptr; + if (paragraph_models_ != nullptr) { + paragraph_models_->delete_data_pointers(); + delete paragraph_models_; + paragraph_models_ = nullptr; + } + if (osd_tesseract_ == tesseract_) osd_tesseract_ = nullptr; + delete tesseract_; + tesseract_ = nullptr; + delete osd_tesseract_; + osd_tesseract_ = nullptr; + delete equ_detect_; + equ_detect_ = nullptr; + delete input_file_; + input_file_ = nullptr; + delete output_file_; + output_file_ = nullptr; + delete datapath_; + datapath_ = nullptr; + delete language_; + language_ = nullptr; +} + +// Clear any library-level memory caches. +// There are a variety of expensive-to-load constant data structures (mostly +// language dictionaries) that are cached globally -- surviving the Init() +// and End() of individual TessBaseAPI's. This function allows the clearing +// of these caches. +void TessBaseAPI::ClearPersistentCache() { + Dict::GlobalDawgCache()->DeleteUnusedDawgs(); +} + +/** + * Check whether a word is valid according to Tesseract's language model + * returns 0 if the word is invalid, non-zero if valid + */ +int TessBaseAPI::IsValidWord(const char *word) { + return tesseract_->getDict().valid_word(word); +} +// Returns true if utf8_character is defined in the UniCharset. +bool TessBaseAPI::IsValidCharacter(const char *utf8_character) { + return tesseract_->unicharset.contains_unichar(utf8_character); +} + + +// TODO(rays) Obsolete this function and replace with a more aptly named +// function that returns image coordinates rather than tesseract coordinates. +bool TessBaseAPI::GetTextDirection(int* out_offset, float* out_slope) { + PageIterator* it = AnalyseLayout(); + if (it == nullptr) { + return false; + } + int x1, x2, y1, y2; + it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2); + // Calculate offset and slope (NOTE: Kind of ugly) + if (x2 <= x1) x2 = x1 + 1; + // Convert the point pair to slope/offset of the baseline (in image coords.) + *out_slope = static_cast(y2 - y1) / (x2 - x1); + *out_offset = static_cast(y1 - *out_slope * x1); + // Get the y-coord of the baseline at the left and right edges of the + // textline's bounding box. + int left, top, right, bottom; + if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) { + delete it; + return false; + } + int left_y = IntCastRounded(*out_slope * left + *out_offset); + int right_y = IntCastRounded(*out_slope * right + *out_offset); + // Shift the baseline down so it passes through the nearest bottom-corner + // of the textline's bounding box. This is the difference between the y + // at the lowest (max) edge of the box and the actual box bottom. + *out_offset += bottom - std::max(left_y, right_y); + // Switch back to bottom-up tesseract coordinates. Requires negation of + // the slope and height - offset for the offset. + *out_slope = -*out_slope; + *out_offset = rect_height_ - *out_offset; + delete it; + + return true; +} + +/** Sets Dict::letter_is_okay_ function to point to the given function. */ +void TessBaseAPI::SetDictFunc(DictFunc f) { + if (tesseract_ != nullptr) { + tesseract_->getDict().letter_is_okay_ = f; + } +} + +/** + * Sets Dict::probability_in_context_ function to point to the given + * function. + * + * @param f A single function that returns the probability of the current + * "character" (in general a utf-8 string), given the context of a previous + * utf-8 string. + */ +void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) { + if (tesseract_ != nullptr) { + tesseract_->getDict().probability_in_context_ = f; + // Set it for the sublangs too. + int num_subs = tesseract_->num_sub_langs(); + for (int i = 0; i < num_subs; ++i) { + tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f; + } + } +} + +#ifndef DISABLED_LEGACY_ENGINE +/** Sets Wordrec::fill_lattice_ function to point to the given function. */ +void TessBaseAPI::SetFillLatticeFunc(FillLatticeFunc f) { + if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f; +} +#endif // ndef DISABLED_LEGACY_ENGINE + +/** Common code for setting the image. */ +bool TessBaseAPI::InternalSetImage() { + if (tesseract_ == nullptr) { + tprintf("Please call Init before attempting to set an image.\n"); + return false; + } + if (thresholder_ == nullptr) + thresholder_ = new ImageThresholder; + ClearResults(); + return true; +} + +/** + * Run the thresholder to make the thresholded image, returned in pix, + * which must not be nullptr. *pix must be initialized to nullptr, or point + * to an existing pixDestroyable Pix. + * The usual argument to Threshold is Tesseract::mutable_pix_binary(). + */ +bool TessBaseAPI::Threshold(Pix** pix) { + ASSERT_HOST(pix != nullptr); + if (*pix != nullptr) + pixDestroy(pix); + // Zero resolution messes up the algorithms, so make sure it is credible. + int user_dpi = 0; + GetIntVariable("user_defined_dpi", &user_dpi); + int y_res = thresholder_->GetScaledYResolution(); + if (user_dpi && (user_dpi < kMinCredibleResolution || + user_dpi > kMaxCredibleResolution)) { + tprintf("Warning: User defined image dpi is outside of expected range " + "(%d - %d)!\n", + kMinCredibleResolution, kMaxCredibleResolution); + } + // Always use user defined dpi + if (user_dpi) { + thresholder_->SetSourceYResolution(user_dpi); + } else if (y_res < kMinCredibleResolution || + y_res > kMaxCredibleResolution) { + tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n", + y_res, kMinCredibleResolution); + thresholder_->SetSourceYResolution(kMinCredibleResolution); + } + auto pageseg_mode = + static_cast( + static_cast(tesseract_->tessedit_pageseg_mode)); + if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) return false; + thresholder_->GetImageSizes(&rect_left_, &rect_top_, + &rect_width_, &rect_height_, + &image_width_, &image_height_); + if (!thresholder_->IsBinary()) { + tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds()); + tesseract_->set_pix_grey(thresholder_->GetPixRectGrey()); + } else { + tesseract_->set_pix_thresholds(nullptr); + tesseract_->set_pix_grey(nullptr); + } + // Set the internal resolution that is used for layout parameters from the + // estimated resolution, rather than the image resolution, which may be + // fabricated, but we will use the image resolution, if there is one, to + // report output point sizes. + int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(), + kMinCredibleResolution, + kMaxCredibleResolution); + if (estimated_res != thresholder_->GetScaledEstimatedResolution()) { + tprintf("Estimated internal resolution %d out of range! " + "Corrected to %d.\n", + thresholder_->GetScaledEstimatedResolution(), estimated_res); + } + tesseract_->set_source_resolution(estimated_res); + return true; +} + +/** Find lines from the image making the BLOCK_LIST. */ +int TessBaseAPI::FindLines() { + if (thresholder_ == nullptr || thresholder_->IsEmpty()) { + tprintf("Please call SetImage before attempting recognition.\n"); + return -1; + } + if (recognition_done_) + ClearResults(); + if (!block_list_->empty()) { + return 0; + } + if (tesseract_ == nullptr) { + tesseract_ = new Tesseract; + #ifndef DISABLED_LEGACY_ENGINE + tesseract_->InitAdaptiveClassifier(nullptr); + #endif + } + if (tesseract_->pix_binary() == nullptr && + !Threshold(tesseract_->mutable_pix_binary())) { + return -1; + } + + tesseract_->PrepareForPageseg(); + +#ifndef DISABLED_LEGACY_ENGINE + if (tesseract_->textord_equation_detect) { + if (equ_detect_ == nullptr && datapath_ != nullptr) { + equ_detect_ = new EquationDetect(datapath_->string(), nullptr); + } + if (equ_detect_ == nullptr) { + tprintf("Warning: Could not set equation detector\n"); + } else { + tesseract_->SetEquationDetect(equ_detect_); + } + } +#endif // ndef DISABLED_LEGACY_ENGINE + + Tesseract* osd_tess = osd_tesseract_; + OSResults osr; + if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && + osd_tess == nullptr) { + if (strcmp(language_->string(), "osd") == 0) { + osd_tess = tesseract_; + } else { + osd_tesseract_ = new Tesseract; + TessdataManager mgr(reader_); + if (datapath_ == nullptr) { + tprintf("Warning: Auto orientation and script detection requested," + " but data path is undefined\n"); + delete osd_tesseract_; + osd_tesseract_ = nullptr; + } else if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr, + "osd", OEM_TESSERACT_ONLY, + nullptr, 0, nullptr, nullptr, + false, &mgr) == 0) { + osd_tess = osd_tesseract_; + osd_tesseract_->set_source_resolution( + thresholder_->GetSourceYResolution()); + } else { + tprintf("Warning: Auto orientation and script detection requested," + " but osd language failed to load\n"); + delete osd_tesseract_; + osd_tesseract_ = nullptr; + } + } + } + + if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0) + return -1; + + #if 1 + return osr.best_result.orientation_id; + #else + + // If Devanagari is being recognized, we use different images for page seg + // and for OCR. + tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr); + return 0; + #endif +} + +/** Delete the pageres and clear the block list ready for a new page. */ +void TessBaseAPI::ClearResults() { + if (tesseract_ != nullptr) { + tesseract_->Clear(); + } + delete page_res_; + page_res_ = nullptr; + recognition_done_ = false; + if (block_list_ == nullptr) + block_list_ = new BLOCK_LIST; + else + block_list_->clear(); + if (paragraph_models_ != nullptr) { + paragraph_models_->delete_data_pointers(); + delete paragraph_models_; + paragraph_models_ = nullptr; + } +} + +/** + * Return the length of the output text string, as UTF8, assuming + * liberally two spacing marks after each word (as paragraphs end with two + * newlines), and assuming a single character reject marker for each rejected + * character. + * Also return the number of recognized blobs in blob_count. + */ +int TessBaseAPI::TextLength(int* blob_count) { + if (tesseract_ == nullptr || page_res_ == nullptr) + return 0; + + PAGE_RES_IT page_res_it(page_res_); + int total_length = 2; + int total_blobs = 0; + // Iterate over the data structures to extract the recognition result. + for (page_res_it.restart_page(); page_res_it.word () != nullptr; + page_res_it.forward()) { + WERD_RES *word = page_res_it.word(); + WERD_CHOICE* choice = word->best_choice; + if (choice != nullptr) { + total_blobs += choice->length() + 2; + total_length += choice->unichar_string().length() + 2; + for (int i = 0; i < word->reject_map.length(); ++i) { + if (word->reject_map[i].rejected()) + ++total_length; + } + } + } + if (blob_count != nullptr) + *blob_count = total_blobs; + return total_length; +} + +#ifndef DISABLED_LEGACY_ENGINE +/** + * Estimates the Orientation And Script of the image. + * Returns true if the image was processed successfully. + */ +bool TessBaseAPI::DetectOS(OSResults* osr) { + if (tesseract_ == nullptr) + return false; + ClearResults(); + if (tesseract_->pix_binary() == nullptr && + !Threshold(tesseract_->mutable_pix_binary())) { + return false; + } + + if (input_file_ == nullptr) + input_file_ = new STRING(kInputFile); + return orientation_and_script_detection(*input_file_, osr, tesseract_) > 0; +} +#endif // ndef DISABLED_LEGACY_ENGINE + +void TessBaseAPI::set_min_orientation_margin(double margin) { + tesseract_->min_orientation_margin.set_value(margin); +} + +/** + * Return text orientation of each block as determined in an earlier page layout + * analysis operation. Orientation is returned as the number of ccw 90-degree + * rotations (in [0..3]) required to make the text in the block upright + * (readable). Note that this may not necessary be the block orientation + * preferred for recognition (such as the case of vertical CJK text). + * + * Also returns whether the text in the block is believed to have vertical + * writing direction (when in an upright page orientation). + * + * The returned array is of length equal to the number of text blocks, which may + * be less than the total number of blocks. The ordering is intended to be + * consistent with GetTextLines(). + */ +void TessBaseAPI::GetBlockTextOrientations(int** block_orientation, + bool** vertical_writing) { + delete[] *block_orientation; + *block_orientation = nullptr; + delete[] *vertical_writing; + *vertical_writing = nullptr; + BLOCK_IT block_it(block_list_); + + block_it.move_to_first(); + int num_blocks = 0; + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + if (!block_it.data()->pdblk.poly_block()->IsText()) { + continue; + } + ++num_blocks; + } + if (!num_blocks) { + tprintf("WARNING: Found no blocks\n"); + return; + } + *block_orientation = new int[num_blocks]; + *vertical_writing = new bool[num_blocks]; + block_it.move_to_first(); + int i = 0; + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) { + if (!block_it.data()->pdblk.poly_block()->IsText()) { + continue; + } + FCOORD re_rotation = block_it.data()->re_rotation(); + float re_theta = re_rotation.angle(); + FCOORD classify_rotation = block_it.data()->classify_rotation(); + float classify_theta = classify_rotation.angle(); + double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI; + if (rot_theta < 0) rot_theta += 4; + int num_rotations = static_cast(rot_theta + 0.5); + (*block_orientation)[i] = num_rotations; + // The classify_rotation is non-zero only if the text has vertical + // writing direction. + (*vertical_writing)[i] = classify_rotation.y() != 0.0f; + ++i; + } +} + + +void TessBaseAPI::DetectParagraphs(bool after_text_recognition) { + int debug_level = 0; + GetIntVariable("paragraph_debug_level", &debug_level); + if (paragraph_models_ == nullptr) + paragraph_models_ = new GenericVector; + MutableIterator *result_it = GetMutableIterator(); + do { // Detect paragraphs for this block + GenericVector models; + ::tesseract::DetectParagraphs(debug_level, after_text_recognition, + result_it, &models); + *paragraph_models_ += models; + } while (result_it->Next(RIL_BLOCK)); + delete result_it; +} + +/** This method returns the string form of the specified unichar. */ +const char* TessBaseAPI::GetUnichar(int unichar_id) { + return tesseract_->unicharset.id_to_unichar(unichar_id); +} + +/** Return the pointer to the i-th dawg loaded into tesseract_ object. */ +const Dawg *TessBaseAPI::GetDawg(int i) const { + if (tesseract_ == nullptr || i >= NumDawgs()) return nullptr; + return tesseract_->getDict().GetDawg(i); +} + +/** Return the number of dawgs loaded into tesseract_ object. */ +int TessBaseAPI::NumDawgs() const { + return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs(); +} + +/** Escape a char string - remove <>&"' with HTML codes. */ +STRING HOcrEscape(const char* text) { + STRING ret; + const char *ptr; + for (ptr = text; *ptr; ptr++) { + switch (*ptr) { + case '<': ret += "<"; break; + case '>': ret += ">"; break; + case '&': ret += "&"; break; + case '"': ret += """; break; + case '\'': ret += "'"; break; + default: ret += *ptr; + } + } + return ret; +} + + +#ifndef DISABLED_LEGACY_ENGINE + + +// ____________________________________________________________________________ +// Ocropus add-ons. + +/** Find lines from the image making the BLOCK_LIST. */ +BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() { + ASSERT_HOST(FindLines() == 0); + BLOCK_LIST* result = block_list_; + block_list_ = nullptr; + return result; +} + +/** + * Delete a block list. + * This is to keep BLOCK_LIST pointer opaque + * and let go of including the other headers. + */ +void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) { + delete block_list; +} + + +ROW *TessBaseAPI::MakeTessOCRRow(float baseline, + float xheight, + float descender, + float ascender) { + int32_t xstarts[] = {-32000}; + double quad_coeffs[] = {0, 0, baseline}; + return new ROW(1, + xstarts, + quad_coeffs, + xheight, + ascender - (baseline + xheight), + descender - baseline, + 0, + 0); +} + +/** Creates a TBLOB* from the whole pix. */ +TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) { + int width = pixGetWidth(pix); + int height = pixGetHeight(pix); + BLOCK block("a character", true, 0, 0, 0, 0, width, height); + + // Create C_BLOBs from the page + extract_edges(pix, &block); + + // Merge all C_BLOBs + C_BLOB_LIST *list = block.blob_list(); + C_BLOB_IT c_blob_it(list); + if (c_blob_it.empty()) + return nullptr; + // Move all the outlines to the first blob. + C_OUTLINE_IT ol_it(c_blob_it.data()->out_list()); + for (c_blob_it.forward(); + !c_blob_it.at_first(); + c_blob_it.forward()) { + C_BLOB *c_blob = c_blob_it.data(); + ol_it.add_list_after(c_blob->out_list()); + } + // Convert the first blob to the output TBLOB. + return TBLOB::PolygonalCopy(false, c_blob_it.data()); +} + +/** + * This method baseline normalizes a TBLOB in-place. The input row is used + * for normalization. The denorm is an optional parameter in which the + * normalization-antidote is returned. + */ +void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) { + TBOX box = tblob->bounding_box(); + float x_center = (box.left() + box.right()) / 2.0f; + float baseline = row->base_line(x_center); + float scale = kBlnXHeight / row->x_height(); + tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale, + 0.0f, static_cast(kBlnBaselineOffset), false, nullptr); +} + +/** + * Return a TBLOB * from the whole pix. + * To be freed later with delete. + */ +static TBLOB *make_tesseract_blob(float baseline, float xheight, + float descender, float ascender, + bool numeric_mode, Pix* pix) { + TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix); + + // Normalize TBLOB + ROW *row = + TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender); + TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode); + delete row; + return tblob; +} + +/** + * Adapt to recognize the current image as the given character. + * The image must be preloaded into pix_binary_ and be just an image + * of a single character. + */ +void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, + int length, + float baseline, + float xheight, + float descender, + float ascender) { + UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length); + TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender, + tesseract_->classify_bln_numeric_mode, + tesseract_->pix_binary()); + float threshold; + float best_rating = -100; + + + // Classify to get a raw choice. + BLOB_CHOICE_LIST choices; + tesseract_->AdaptiveClassifier(blob, &choices); + BLOB_CHOICE_IT choice_it; + choice_it.set_to_list(&choices); + for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); + choice_it.forward()) { + if (choice_it.data()->rating() > best_rating) { + best_rating = choice_it.data()->rating(); + } + } + + threshold = tesseract_->matcher_good_threshold; + + if (blob->outlines) + tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold, + tesseract_->AdaptedTemplates); + delete blob; +} + + +PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) { + auto *page_res = new PAGE_RES(false, block_list, + &(tesseract_->prev_word_best_choice_)); + tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1); + return page_res; +} + +PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list, + PAGE_RES* pass1_result) { + if (!pass1_result) + pass1_result = new PAGE_RES(false, block_list, + &(tesseract_->prev_word_best_choice_)); + tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2); + return pass1_result; +} + +struct TESS_CHAR : ELIST_LINK { + char *unicode_repr; + int length; // of unicode_repr + float cost; + TBOX box; + + TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) { + length = (len == -1 ? strlen(repr) : len); + unicode_repr = new char[length + 1]; + strncpy(unicode_repr, repr, length); + } + + TESS_CHAR() + : unicode_repr(nullptr), + length(0), + cost(0.0f) + { // Satisfies ELISTIZE. + } + ~TESS_CHAR() { + delete [] unicode_repr; + } +}; + +ELISTIZEH(TESS_CHAR) +ELISTIZE(TESS_CHAR) + +static void add_space(TESS_CHAR_IT* it) { + auto *t = new TESS_CHAR(0, " "); + it->add_after_then_move(t); +} + + +static float rating_to_cost(float rating) { + rating = 100 + rating; + // cuddled that to save from coverage profiler + // (I have never seen ratings worse than -100, + // but the check won't hurt) + if (rating < 0) rating = 0; + return rating; +} + +/** + * Extract the OCR results, costs (penalty points for uncertainty), + * and the bounding boxes of the characters. + */ +static void extract_result(TESS_CHAR_IT* out, + PAGE_RES* page_res) { + PAGE_RES_IT page_res_it(page_res); + int word_count = 0; + while (page_res_it.word() != nullptr) { + WERD_RES *word = page_res_it.word(); + const char *str = word->best_choice->unichar_string().string(); + const char *len = word->best_choice->unichar_lengths().string(); + TBOX real_rect = word->word->bounding_box(); + + if (word_count) + add_space(out); + int n = strlen(len); + for (int i = 0; i < n; i++) { + auto *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), + str, *len); + tc->box = real_rect.intersection(word->box_word->BlobBox(i)); + out->add_after_then_move(tc); + str += *len; + len++; + } + page_res_it.forward(); + word_count++; + } +} + +/** + * Extract the OCR results, costs (penalty points for uncertainty), + * and the bounding boxes of the characters. + */ +int TessBaseAPI::TesseractExtractResult(char** text, + int** lengths, + float** costs, + int** x0, + int** y0, + int** x1, + int** y1, + PAGE_RES* page_res) { + TESS_CHAR_LIST tess_chars; + TESS_CHAR_IT tess_chars_it(&tess_chars); + extract_result(&tess_chars_it, page_res); + tess_chars_it.move_to_first(); + int n = tess_chars.length(); + int text_len = 0; + *lengths = new int[n]; + *costs = new float[n]; + *x0 = new int[n]; + *y0 = new int[n]; + *x1 = new int[n]; + *y1 = new int[n]; + int i = 0; + for (tess_chars_it.mark_cycle_pt(); + !tess_chars_it.cycled_list(); + tess_chars_it.forward(), i++) { + TESS_CHAR *tc = tess_chars_it.data(); + text_len += (*lengths)[i] = tc->length; + (*costs)[i] = tc->cost; + (*x0)[i] = tc->box.left(); + (*y0)[i] = tc->box.bottom(); + (*x1)[i] = tc->box.right(); + (*y1)[i] = tc->box.top(); + } + char *p = *text = new char[text_len]; + + tess_chars_it.move_to_first(); + for (tess_chars_it.mark_cycle_pt(); + !tess_chars_it.cycled_list(); + tess_chars_it.forward()) { + TESS_CHAR *tc = tess_chars_it.data(); + strncpy(p, tc->unicode_repr, tc->length); + p += tc->length; + } + return n; +} + +/** This method returns the features associated with the input blob. */ +// The resulting features are returned in int_features, which must be +// of size MAX_NUM_INT_FEATURES. The number of features is returned in +// num_features (or 0 if there was a failure). +// On return feature_outline_index is filled with an index of the outline +// corresponding to each feature in int_features. +// TODO(rays) Fix the caller to out outline_counts instead. +void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob, + INT_FEATURE_STRUCT* int_features, + int* num_features, + int* feature_outline_index) { + GenericVector outline_counts; + GenericVector bl_features; + GenericVector cn_features; + INT_FX_RESULT_STRUCT fx_info; + tesseract_->ExtractFeatures(*blob, false, &bl_features, + &cn_features, &fx_info, &outline_counts); + if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) { + *num_features = 0; + return; // Feature extraction failed. + } + *num_features = cn_features.size(); + memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0])); + // TODO(rays) Pass outline_counts back and simplify the calling code. + if (feature_outline_index != nullptr) { + int f = 0; + for (int i = 0; i < outline_counts.size(); ++i) { + while (f < outline_counts[i]) + feature_outline_index[f++] = i; + } + } +} + +// This method returns the row to which a box of specified dimensions would +// belong. If no good match is found, it returns nullptr. +ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks, + int left, int top, int right, int bottom) { + TBOX box(left, bottom, right, top); + BLOCK_IT b_it(blocks); + for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { + BLOCK* block = b_it.data(); + if (!box.major_overlap(block->pdblk.bounding_box())) + continue; + ROW_IT r_it(block->row_list()); + for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { + ROW* row = r_it.data(); + if (!box.major_overlap(row->bounding_box())) + continue; + WERD_IT w_it(row->word_list()); + for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { + WERD* word = w_it.data(); + if (box.major_overlap(word->bounding_box())) + return row; + } + } + } + return nullptr; +} + +/** Method to run adaptive classifier on a blob. */ +void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob, + int num_max_matches, + int* unichar_ids, + float* ratings, + int* num_matches_returned) { + auto* choices = new BLOB_CHOICE_LIST; + tesseract_->AdaptiveClassifier(blob, choices); + BLOB_CHOICE_IT choices_it(choices); + int& index = *num_matches_returned; + index = 0; + for (choices_it.mark_cycle_pt(); + !choices_it.cycled_list() && index < num_max_matches; + choices_it.forward()) { + BLOB_CHOICE* choice = choices_it.data(); + unichar_ids[index] = choice->unichar_id(); + ratings[index] = choice->rating(); + ++index; + } + *num_matches_returned = index; + delete choices; +} +#endif // ndef DISABLED_LEGACY_ENGINE + +} // namespace tesseract. diff --git a/third_party/ocr/tesseract-ocr/src/baseapi.h b/third_party/ocr/tesseract-ocr/src/baseapi.h new file mode 100644 index 00000000..fe12351b --- /dev/null +++ b/third_party/ocr/tesseract-ocr/src/baseapi.h @@ -0,0 +1,946 @@ +/////////////////////////////////////////////////////////////////////// +// File: baseapi.h +// Description: Simple API for calling tesseract. +// Author: Ray Smith +// +// (C) Copyright 2006, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_API_BASEAPI_H_ +#define TESSERACT_API_BASEAPI_H_ + +#include +// To avoid collision with other typenames include the ABSOLUTE MINIMUM +// complexity of includes here. Use forward declarations wherever possible +// and hide includes of complex types in baseapi.cpp. +#include "apitypes.h" +#include "pageiterator.h" +#include "platform.h" +#include "publictypes.h" +#include "resultiterator.h" +#include "serialis.h" +#include "tess_version.h" +#include "tesscallback.h" +#include "thresholder.h" +#include "unichar.h" + +template class GenericVector; +class PAGE_RES; +class PAGE_RES_IT; +class ParagraphModel; +struct BlamerBundle; +class BLOCK_LIST; +class DENORM; +class MATRIX; +class ROW; +class STRING; +class WERD; +struct Pix; +struct Box; +struct Pixa; +struct Boxa; +class ETEXT_DESC; +struct OSResults; +class TBOX; +class UNICHARSET; +class WERD_CHOICE_LIST; + +struct INT_FEATURE_STRUCT; +using INT_FEATURE = INT_FEATURE_STRUCT *; +struct TBLOB; + +namespace tesseract { + +class Dawg; +class Dict; +class EquationDetect; +class PageIterator; +class LTRResultIterator; +class ResultIterator; +class MutableIterator; +class TessResultRenderer; +class Tesseract; +class Trie; +class Wordrec; + +using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const; +using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *, int, const char *, int); +using ParamsModelClassifyFunc = float (Dict::*)(const char *, void *); +using FillLatticeFunc = void (Wordrec::*)(const MATRIX &, const WERD_CHOICE_LIST &, const UNICHARSET &, BlamerBundle *); +typedef TessCallback4 + TruthCallback; + +/** + * Base class for all tesseract APIs. + * Specific classes can add ability to work on different inputs or produce + * different outputs. + * This class is mostly an interface layer on top of the Tesseract instance + * class to hide the data types so that users of this class don't have to + * include any other Tesseract headers. + */ +class TESS_API TessBaseAPI { + public: + TessBaseAPI(); + virtual ~TessBaseAPI(); + + int MyOSD(); + + /** + * Returns the version identifier as a static string. Do not delete. + */ + static const char* Version(); + + /** + * If compiled with OpenCL AND an available OpenCL + * device is deemed faster than serial code, then + * "device" is populated with the cl_device_id + * and returns sizeof(cl_device_id) + * otherwise *device=nullptr and returns 0. + */ + static size_t getOpenCLDevice(void **device); + + /** + * Writes the thresholded image to stderr as a PBM file on receipt of a + * SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only). + */ + static void CatchSignals(); + + /** + * Set the name of the input file. Needed for training and + * reading a UNLV zone file, and for searchable PDF output. + */ + void SetInputName(const char* name); + /** + * These functions are required for searchable PDF output. + * We need our hands on the input file so that we can include + * it in the PDF without transcoding. If that is not possible, + * we need the original image. Finally, resolution metadata + * is stored in the PDF so we need that as well. + */ + const char* GetInputName(); + // Takes ownership of the input pix. + void SetInputImage(Pix *pix); + Pix* GetInputImage(); + int GetSourceYResolution(); + const char* GetDatapath(); + + /** Set the name of the bonus output files. Needed only for debugging. */ + void SetOutputName(const char* name); + + /** + * Set the value of an internal "parameter." + * Supply the name of the parameter and the value as a string, just as + * you would in a config file. + * Returns false if the name lookup failed. + * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z. + * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode. + * SetVariable may be used before Init, but settings will revert to + * defaults on End(). + * + * Note: Must be called after Init(). Only works for non-init variables + * (init variables should be passed to Init()). + */ + bool SetVariable(const char* name, const char* value); + bool SetDebugVariable(const char* name, const char* value); + + /** + * Returns true if the parameter was found among Tesseract parameters. + * Fills in value with the value of the parameter. + */ + bool GetIntVariable(const char *name, int *value) const; + bool GetBoolVariable(const char *name, bool *value) const; + bool GetDoubleVariable(const char *name, double *value) const; + + /** + * Returns the pointer to the string that represents the value of the + * parameter if it was found among Tesseract parameters. + */ + const char *GetStringVariable(const char *name) const; + + /** + * Print Tesseract parameters to the given file. + */ + void PrintVariables(FILE *fp) const; + + /** + * Get value of named variable as a string, if it exists. + */ + bool GetVariableAsString(const char *name, STRING *val); + + /** + * Instances are now mostly thread-safe and totally independent, + * but some global parameters remain. Basically it is safe to use multiple + * TessBaseAPIs in different threads in parallel, UNLESS: + * you use SetVariable on some of the Params in classify and textord. + * If you do, then the effect will be to change it for all your instances. + * + * Start tesseract. Returns zero on success and -1 on failure. + * NOTE that the only members that may be called before Init are those + * listed above here in the class definition. + * + * The datapath must be the name of the tessdata directory. + * The language is (usually) an ISO 639-3 string or nullptr will default to eng. + * It is entirely safe (and eventually will be efficient too) to call + * Init multiple times on the same instance to change language, or just + * to reset the classifier. + * The language may be a string of the form [~][+[~]]* indicating + * that multiple languages are to be loaded. Eg hin+eng will load Hindi and + * English. Languages may specify internally that they want to be loaded + * with one or more other languages, so the ~ sign is available to override + * that. Eg if hin were set to load eng by default, then hin+~eng would force + * loading only hin. The number of loaded languages is limited only by + * memory, with the caveat that loading additional languages will impact + * both speed and accuracy, as there is more work to do to decide on the + * applicable language, and there is more chance of hallucinating incorrect + * words. + * WARNING: On changing languages, all Tesseract parameters are reset + * back to their default values. (Which may vary between languages.) + * If you have a rare need to set a Variable that controls + * initialization for a second call to Init you should explicitly + * call End() and then use SetVariable before Init. This is only a very + * rare use case, since there are very few uses that require any parameters + * to be set before Init. + * + * If set_only_non_debug_params is true, only params that do not contain + * "debug" in the name will be set. + */ + int Init(const char* datapath, const char* language, OcrEngineMode mode, + char **configs, int configs_size, + const GenericVector *vars_vec, + const GenericVector *vars_values, + bool set_only_non_debug_params); + int Init(const char* datapath, const char* language, OcrEngineMode oem) { + return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false); + } + int Init(const char* datapath, const char* language) { + return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false); + } + // In-memory version reads the traineddata file directly from the given + // data[data_size] array, and/or reads data via a FileReader. + int Init(const char* data, int data_size, const char* language, + OcrEngineMode mode, char** configs, int configs_size, + const GenericVector* vars_vec, + const GenericVector* vars_values, + bool set_only_non_debug_params, FileReader reader); + + /** + * Returns the languages string used in the last valid initialization. + * If the last initialization specified "deu+hin" then that will be + * returned. If hin loaded eng automatically as well, then that will + * not be included in this list. To find the languages actually + * loaded use GetLoadedLanguagesAsVector. + * The returned string should NOT be deleted. + */ + const char* GetInitLanguagesAsString() const; + + /** + * Returns the loaded languages in the vector of STRINGs. + * Includes all languages loaded by the last Init, including those loaded + * as dependencies of other loaded languages. + */ + void GetLoadedLanguagesAsVector(GenericVector* langs) const; + + /** + * Returns the available languages in the sorted vector of STRINGs. + */ + void GetAvailableLanguagesAsVector(GenericVector* langs) const; + + /** + * Init only the lang model component of Tesseract. The only functions + * that work after this init are SetVariable and IsValidWord. + * WARNING: temporary! This function will be removed from here and placed + * in a separate API at some future time. + */ + int InitLangMod(const char* datapath, const char* language); + + /** + * Init only for page layout analysis. Use only for calls to SetImage and + * AnalysePage. Calls that attempt recognition will generate an error. + */ + void InitForAnalysePage(); + + /** + * Read a "config" file containing a set of param, value pairs. + * Searches the standard places: tessdata/configs, tessdata/tessconfigs + * and also accepts a relative or absolute path name. + * Note: only non-init params will be set (init params are set by Init()). + */ + void ReadConfigFile(const char* filename); + /** Same as above, but only set debug params from the given config file. */ + void ReadDebugConfigFile(const char* filename); + + /** + * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. + * The mode is stored as an IntParam so it can also be modified by + * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). + */ + void SetPageSegMode(PageSegMode mode); + + /** Return the current page segmentation mode. */ + PageSegMode GetPageSegMode() const; + + /** + * Recognize a rectangle from an image and return the result as a string. + * May be called many times for a single Init. + * Currently has no error checking. + * Greyscale of 8 and color of 24 or 32 bits per pixel may be given. + * Palette color images will not work properly and must be converted to + * 24 bit. + * Binary images of 1 bit per pixel may also be given but they must be + * byte packed with the MSB of the first byte being the first pixel, and a + * 1 represents WHITE. For binary images set bytes_per_pixel=0. + * The recognized text is returned as a char* which is coded + * as UTF8 and must be freed with the delete [] operator. + * + * Note that TesseractRect is the simplified convenience interface. + * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize, + * and one or more of the Get*Text functions below. + */ + char* TesseractRect(const unsigned char* imagedata, + int bytes_per_pixel, int bytes_per_line, + int left, int top, int width, int height); + + /** + * Call between pages or documents etc to free up memory and forget + * adaptive data. + */ + void ClearAdaptiveClassifier(); + + /** + * @defgroup AdvancedAPI Advanced API + * The following methods break TesseractRect into pieces, so you can + * get hold of the thresholded image, get the text in different formats, + * get bounding boxes, confidences etc. + */ + /* @{ */ + + /** + * Provide an image for Tesseract to recognize. Format is as + * TesseractRect above. Copies the image buffer and converts to Pix. + * SetImage clears all recognition results, and sets the rectangle to the + * full image, so it may be followed immediately by a GetUTF8Text, and it + * will automatically perform recognition. + */ + void SetImage(const unsigned char* imagedata, int width, int height, + int bytes_per_pixel, int bytes_per_line); + + /** + * Provide an image for Tesseract to recognize. As with SetImage above, + * Tesseract takes its own copy of the image, so it need not persist until + * after Recognize. + * Pix vs raw, which to use? + * Use Pix where possible. Tesseract uses Pix as its internal representation + * and it is therefore more efficient to provide a Pix directly. + */ + void SetImage(Pix* pix); + + /** + * Set the resolution of the source image in pixels per inch so font size + * information can be calculated in results. Call this after SetImage(). + */ + void SetSourceResolution(int ppi); + + /** + * Restrict recognition to a sub-rectangle of the image. Call after SetImage. + * Each SetRectangle clears the recogntion results so multiple rectangles + * can be recognized with the same image. + */ + void SetRectangle(int left, int top, int width, int height); + + /** + * In extreme cases only, usually with a subclass of Thresholder, it + * is possible to provide a different Thresholder. The Thresholder may + * be preloaded with an image, settings etc, or they may be set after. + * Note that Tesseract takes ownership of the Thresholder and will + * delete it when it it is replaced or the API is destructed. + */ + void SetThresholder(ImageThresholder* thresholder) { + delete thresholder_; + thresholder_ = thresholder; + ClearResults(); + } + + /** + * Get a copy of the internal thresholded image from Tesseract. + * Caller takes ownership of the Pix and must pixDestroy it. + * May be called any time after SetImage, or after TesseractRect. + */ + Pix* GetThresholdedImage(); + + /** + * Get the result of page layout analysis as a leptonica-style + * Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + */ + Boxa* GetRegions(Pixa** pixa); + + /** + * Get the textlines as a leptonica-style + * Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + * If raw_image is true, then extract from the original image instead of the + * thresholded image and pad by raw_padding pixels. + * If blockids is not nullptr, the block-id of each line is also returned as an + * array of one element per line. delete [] after use. + * If paraids is not nullptr, the paragraph-id of each line within its block is + * also returned as an array of one element per line. delete [] after use. + */ + Boxa* GetTextlines(bool raw_image, int raw_padding, + Pixa** pixa, int** blockids, int** paraids); + /* + Helper method to extract from the thresholded image. (most common usage) + */ + Boxa* GetTextlines(Pixa** pixa, int** blockids) { + return GetTextlines(false, 0, pixa, blockids, nullptr); + } + + /** + * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa + * pair, in reading order. Enables downstream handling of non-rectangular + * regions. + * Can be called before or after Recognize. + * If blockids is not nullptr, the block-id of each line is also returned as an + * array of one element per line. delete [] after use. + */ + Boxa* GetStrips(Pixa** pixa, int** blockids); + + /** + * Get the words as a leptonica-style + * Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + */ + Boxa* GetWords(Pixa** pixa); + + /** + * Gets the individual connected (text) components (created + * after pages segmentation step, but before recognition) + * as a leptonica-style Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + * Note: the caller is responsible for calling boxaDestroy() + * on the returned Boxa array and pixaDestroy() on cc array. + */ + Boxa* GetConnectedComponents(Pixa** cc); + + /** + * Get the given level kind of components (block, textline, word etc.) as a + * leptonica-style Boxa, Pixa pair, in reading order. + * Can be called before or after Recognize. + * If blockids is not nullptr, the block-id of each component is also returned + * as an array of one element per component. delete [] after use. + * If blockids is not nullptr, the paragraph-id of each component with its block + * is also returned as an array of one element per component. delete [] after + * use. + * If raw_image is true, then portions of the original image are extracted + * instead of the thresholded image and padded with raw_padding. + * If text_only is true, then only text components are returned. + */ + Boxa* GetComponentImages(PageIteratorLevel level, + bool text_only, bool raw_image, + int raw_padding, + Pixa** pixa, int** blockids, int** paraids); + // Helper function to get binary images with no padding (most common usage). + Boxa* GetComponentImages(const PageIteratorLevel level, + const bool text_only, + Pixa** pixa, int** blockids) { + return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr); + } + + /** + * Returns the scale factor of the thresholded image that would be returned by + * GetThresholdedImage() and the various GetX() methods that call + * GetComponentImages(). + * Returns 0 if no thresholder has been set. + */ + int GetThresholdedImageScaleFactor() const; + + /** + * Runs page layout analysis in the mode set by SetPageSegMode. + * May optionally be called prior to Recognize to get access to just + * the page layout results. Returns an iterator to the results. + * If merge_similar_words is true, words are combined where suitable for use + * with a line recognizer. Use if you want to use AnalyseLayout to find the + * textlines, and then want to process textline fragments with an external + * line recognizer. + * Returns nullptr on error or an empty page. + * The returned iterator must be deleted after use. + * WARNING! This class points to data held within the TessBaseAPI class, and + * therefore can only be used while the TessBaseAPI class still exists and + * has not been subjected to a call of Init, SetImage, Recognize, Clear, End + * DetectOS, or anything else that changes the internal PAGE_RES. + */ + PageIterator* AnalyseLayout(); + PageIterator* AnalyseLayout(bool merge_similar_words); + + /** + * Recognize the image from SetAndThresholdImage, generating Tesseract + * internal structures. Returns 0 on success. + * Optional. The Get*Text functions below will call Recognize if needed. + * After Recognize, the output is kept internally until the next SetImage. + */ + int Recognize(ETEXT_DESC* monitor); + + /** + * Methods to retrieve information after SetAndThresholdImage(), + * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.) + */ + + #ifndef DISABLED_LEGACY_ENGINE + /** Variant on Recognize used for testing chopper. */ + int RecognizeForChopTest(ETEXT_DESC* monitor); + #endif + + /** + * Turns images into symbolic text. + * + * filename can point to a single image, a multi-page TIFF, + * or a plain text list of image filenames. + * + * retry_config is useful for debugging. If not nullptr, you can fall + * back to an alternate configuration if a page fails for some + * reason. + * + * timeout_millisec terminates processing if any single page + * takes too long. Set to 0 for unlimited time. + * + * renderer is responible for creating the output. For example, + * use the TessTextRenderer if you want plaintext output, or + * the TessPDFRender to produce searchable PDF. + * + * If tessedit_page_number is non-negative, will only process that + * single page. Works for multi-page tiff file, or filelist. + * + * Returns true if successful, false on error. + */ + bool ProcessPages(const char* filename, const char* retry_config, + int timeout_millisec, TessResultRenderer* renderer); + // Does the real work of ProcessPages. + bool ProcessPagesInternal(const char* filename, const char* retry_config, + int timeout_millisec, TessResultRenderer* renderer); + + /** + * Turn a single image into symbolic text. + * + * The pix is the image processed. filename and page_index are + * metadata used by side-effect processes, such as reading a box + * file or formatting as hOCR. + * + * See ProcessPages for desciptions of other parameters. + */ + bool ProcessPage(Pix* pix, int page_index, const char* filename, + const char* retry_config, int timeout_millisec, + TessResultRenderer* renderer); + + /** + * Get a reading-order iterator to the results of LayoutAnalysis and/or + * Recognize. The returned iterator must be deleted after use. + * WARNING! This class points to data held within the TessBaseAPI class, and + * therefore can only be used while the TessBaseAPI class still exists and + * has not been subjected to a call of Init, SetImage, Recognize, Clear, End + * DetectOS, or anything else that changes the internal PAGE_RES. + */ + ResultIterator* GetIterator(); + + /** + * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. + * The returned iterator must be deleted after use. + * WARNING! This class points to data held within the TessBaseAPI class, and + * therefore can only be used while the TessBaseAPI class still exists and + * has not been subjected to a call of Init, SetImage, Recognize, Clear, End + * DetectOS, or anything else that changes the internal PAGE_RES. + */ + MutableIterator* GetMutableIterator(); + + /** + * The recognized text is returned as a char* which is coded + * as UTF8 and must be freed with the delete [] operator. + */ + char* GetUTF8Text(); + + /** + * Make a HTML-formatted string with hOCR markup from the internal + * data structures. + * page_number is 0-based but will appear in the output as 1-based. + * monitor can be used to + * cancel the recognition + * receive progress callbacks + * Returned string must be freed with the delete [] operator. + */ + char* GetHOCRText(ETEXT_DESC* monitor, int page_number); + + /** + * Make a HTML-formatted string with hOCR markup from the internal + * data structures. + * page_number is 0-based but will appear in the output as 1-based. + * Returned string must be freed with the delete [] operator. + */ + char* GetHOCRText(int page_number); + + /** + * Make an XML-formatted string with Alto markup from the internal + * data structures. + */ + char* GetAltoText(ETEXT_DESC* monitor, int page_number); + + + /** + * Make an XML-formatted string with Alto markup from the internal + * data structures. + */ + char* GetAltoText(int page_number); + + /** + * Make a TSV-formatted string from the internal data structures. + * page_number is 0-based but will appear in the output as 1-based. + * Returned string must be freed with the delete [] operator. + */ + char* GetTSVText(int page_number); + + /** + * Make a box file for LSTM training from the internal data structures. + * Constructs coordinates in the original image - not just the rectangle. + * page_number is a 0-based page index that will appear in the box file. + * Returned string must be freed with the delete [] operator. + */ + char* GetLSTMBoxText(int page_number); + + /** + * The recognized text is returned as a char* which is coded in the same + * format as a box file used in training. + * Constructs coordinates in the original image - not just the rectangle. + * page_number is a 0-based page index that will appear in the box file. + * Returned string must be freed with the delete [] operator. + */ + char* GetBoxText(int page_number); + + /** + * The recognized text is returned as a char* which is coded in the same + * format as a WordStr box file used in training. + * page_number is a 0-based page index that will appear in the box file. + * Returned string must be freed with the delete [] operator. + */ + char* GetWordStrBoxText(int page_number); + + /** + * The recognized text is returned as a char* which is coded + * as UNLV format Latin-1 with specific reject and suspect codes. + * Returned string must be freed with the delete [] operator. + */ + char* GetUNLVText(); + + /** + * Detect the orientation of the input image and apparent script (alphabet). + * orient_deg is the detected clockwise rotation of the input image in degrees + * (0, 90, 180, 270) + * orient_conf is the confidence (15.0 is reasonably confident) + * script_name is an ASCII string, the name of the script, e.g. "Latin" + * script_conf is confidence level in the script + * Returns true on success and writes values to each parameter as an output + */ + bool DetectOrientationScript(int* orient_deg, float* orient_conf, + const char** script_name, float* script_conf); + + /** + * The recognized text is returned as a char* which is coded + * as UTF8 and must be freed with the delete [] operator. + * page_number is a 0-based page index that will appear in the osd file. + */ + char* GetOsdText(int page_number); + + /** Returns the (average) confidence value between 0 and 100. */ + int MeanTextConf(); + /** + * Returns all word confidences (between 0 and 100) in an array, terminated + * by -1. The calling function must delete [] after use. + * The number of confidences should correspond to the number of space- + * delimited words in GetUTF8Text. + */ + int* AllWordConfidences(); + +#ifndef DISABLED_LEGACY_ENGINE + /** + * Applies the given word to the adaptive classifier if possible. + * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can + * tell the boundaries of the graphemes. + * Assumes that SetImage/SetRectangle have been used to set the image + * to the given word. The mode arg should be PSM_SINGLE_WORD or + * PSM_CIRCLE_WORD, as that will be used to control layout analysis. + * The currently set PageSegMode is preserved. + * Returns false if adaption was not possible for some reason. + */ + bool AdaptToWordStr(PageSegMode mode, const char* wordstr); +#endif // ndef DISABLED_LEGACY_ENGINE + + /** + * Free up recognition results and any stored image data, without actually + * freeing any recognition data that would be time-consuming to reload. + * Afterwards, you must call SetImage or TesseractRect before doing + * any Recognize or Get* operation. + */ + void Clear(); + + /** + * Close down tesseract and free up all memory. End() is equivalent to + * destructing and reconstructing your TessBaseAPI. + * Once End() has been used, none of the other API functions may be used + * other than Init and anything declared above it in the class definition. + */ + void End(); + + /** + * Clear any library-level memory caches. + * There are a variety of expensive-to-load constant data structures (mostly + * language dictionaries) that are cached globally -- surviving the Init() + * and End() of individual TessBaseAPI's. This function allows the clearing + * of these caches. + **/ + static void ClearPersistentCache(); + + /** + * Check whether a word is valid according to Tesseract's language model + * @return 0 if the word is invalid, non-zero if valid. + * @warning temporary! This function will be removed from here and placed + * in a separate API at some future time. + */ + int IsValidWord(const char *word); + // Returns true if utf8_character is defined in the UniCharset. + bool IsValidCharacter(const char *utf8_character); + + + bool GetTextDirection(int* out_offset, float* out_slope); + + /** Sets Dict::letter_is_okay_ function to point to the given function. */ + void SetDictFunc(DictFunc f); + + /** Sets Dict::probability_in_context_ function to point to the given + * function. + */ + void SetProbabilityInContextFunc(ProbabilityInContextFunc f); + + /** + * Estimates the Orientation And Script of the image. + * @return true if the image was processed successfully. + */ + bool DetectOS(OSResults*); + + /** + * Return text orientation of each block as determined by an earlier run + * of layout analysis. + */ + void GetBlockTextOrientations(int** block_orientation, + bool** vertical_writing); + + + #ifndef DISABLED_LEGACY_ENGINE + + /** Sets Wordrec::fill_lattice_ function to point to the given function. */ + void SetFillLatticeFunc(FillLatticeFunc f); + + /** Find lines from the image making the BLOCK_LIST. */ + BLOCK_LIST* FindLinesCreateBlockList(); + + /** + * Delete a block list. + * This is to keep BLOCK_LIST pointer opaque + * and let go of including the other headers. + */ + static void DeleteBlockList(BLOCK_LIST* block_list); + + /** Returns a ROW object created from the input row specification. */ + static ROW *MakeTessOCRRow(float baseline, float xheight, + float descender, float ascender); + + /** Returns a TBLOB corresponding to the entire input image. */ + static TBLOB *MakeTBLOB(Pix *pix); + + /** + * This method baseline normalizes a TBLOB in-place. The input row is used + * for normalization. The denorm is an optional parameter in which the + * normalization-antidote is returned. + */ + static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode); + + /** This method returns the features associated with the input image. */ + void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features, + int* num_features, int* feature_outline_index); + + /** + * This method returns the row to which a box of specified dimensions would + * belong. If no good match is found, it returns nullptr. + */ + static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, + int right, int bottom); + + /** + * Method to run adaptive classifier on a blob. + * It returns at max num_max_matches results. + */ + void RunAdaptiveClassifier(TBLOB* blob, + int num_max_matches, + int* unichar_ids, + float* ratings, + int* num_matches_returned); +#endif // ndef DISABLED_LEGACY_ENGINE + + /** This method returns the string form of the specified unichar. */ + const char* GetUnichar(int unichar_id); + + /** Return the pointer to the i-th dawg loaded into tesseract_ object. */ + const Dawg *GetDawg(int i) const; + + /** Return the number of dawgs loaded into tesseract_ object. */ + int NumDawgs() const; + + Tesseract* tesseract() const { return tesseract_; } + + OcrEngineMode oem() const { return last_oem_requested_; } + + void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; } + + void set_min_orientation_margin(double margin); + /* @} */ + + protected: + + /** Common code for setting the image. Returns true if Init has been called. */ + TESS_LOCAL bool InternalSetImage(); + + /** + * Run the thresholder to make the thresholded image. If pix is not nullptr, + * the source is thresholded to pix instead of the internal IMAGE. + */ + TESS_LOCAL virtual bool Threshold(Pix** pix); + + /** + * Find lines from the image making the BLOCK_LIST. + * @return 0 on success. + */ + TESS_LOCAL int FindLines(); + + /** Delete the pageres and block list ready for a new page. */ + void ClearResults(); + + /** + * Return an LTR Result Iterator -- used only for training, as we really want + * to ignore all BiDi smarts at that point. + * delete once you're done with it. + */ + TESS_LOCAL LTRResultIterator* GetLTRIterator(); + + /** + * Return the length of the output text string, as UTF8, assuming + * one newline per line and one per block, with a terminator, + * and assuming a single character reject marker for each rejected character. + * Also return the number of recognized blobs in blob_count. + */ + TESS_LOCAL int TextLength(int* blob_count); + + //// paragraphs.cpp //////////////////////////////////////////////////// + TESS_LOCAL void DetectParagraphs(bool after_text_recognition); + + #ifndef DISABLED_LEGACY_ENGINE + + /** @defgroup ocropusAddOns ocropus add-ons */ + /* @{ */ + + /** + * Adapt to recognize the current image as the given character. + * The image must be preloaded and be just an image of a single character. + */ + TESS_LOCAL void AdaptToCharacter(const char *unichar_repr, + int length, + float baseline, + float xheight, + float descender, + float ascender); + + /** Recognize text doing one pass only, using settings for a given pass. */ + TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list); + + TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, + PAGE_RES* pass1_result); + + /** + * Extract the OCR results, costs (penalty points for uncertainty), + * and the bounding boxes of the characters. + */ + TESS_LOCAL static int TesseractExtractResult(char** text, + int** lengths, + float** costs, + int** x0, + int** y0, + int** x1, + int** y1, + PAGE_RES* page_res); + + TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; } + /* @} */ +#endif // ndef DISABLED_LEGACY_ENGINE + + protected: + Tesseract* tesseract_; ///< The underlying data object. + Tesseract* osd_tesseract_; ///< For orientation & script detection. + EquationDetect* equ_detect_; ///* paragraph_models_; + BLOCK_LIST* block_list_; ///< The page layout. + PAGE_RES* page_res_; ///< The page-level data. + STRING* input_file_; ///< Name used by training code. + STRING* output_file_; ///< Name used by debug code. + STRING* datapath_; ///< Current location of tessdata. + STRING* language_; ///< Last initialized language. + OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested. + bool recognition_done_; ///< page_res_ contains recognition data. + TruthCallback *truth_cb_; /// fxn for setting truth_* in WERD_RES + + /** + * @defgroup ThresholderParams Thresholder Parameters + * Parameters saved from the Thresholder. Needed to rebuild coordinates. + */ + /* @{ */ + int rect_left_; + int rect_top_; + int rect_width_; + int rect_height_; + int image_width_; + int image_height_; + /* @} */ + + private: + // A list of image filenames gets special consideration + bool ProcessPagesFileList(FILE *fp, + STRING *buf, + const char* retry_config, int timeout_millisec, + TessResultRenderer* renderer, + int tessedit_page_number); + // TIFF supports multipage so gets special consideration. + bool ProcessPagesMultipageTiff(const unsigned char *data, + size_t size, + const char* filename, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer, + int tessedit_page_number); + // There's currently no way to pass a document title from the + // Tesseract command line, and we have multiple places that choose + // to set the title to an empty string. Using a single named + // variable will hopefully reduce confusion if the situation changes + // in the future. + const char *unknown_title_ = ""; +}; // class TessBaseAPI. + +/** Escape a char string - remove &<>"' with HTML codes. */ +STRING HOcrEscape(const char* text); +} // namespace tesseract. + +#endif // TESSERACT_API_BASEAPI_H_ diff --git a/third_party/ocr/tesseract-ocr/src/capi.cpp b/third_party/ocr/tesseract-ocr/src/capi.cpp new file mode 100644 index 00000000..c57ab30a --- /dev/null +++ b/third_party/ocr/tesseract-ocr/src/capi.cpp @@ -0,0 +1,919 @@ +/////////////////////////////////////////////////////////////////////// +// File: capi.cpp +// Description: C-API TessBaseAPI +// +// (C) Copyright 2012, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESS_CAPI_INCLUDE_BASEAPI +# define TESS_CAPI_INCLUDE_BASEAPI +#endif +#include "capi.h" +#include "genericvector.h" +#include "strngs.h" + +TESS_API int MyOSD(TessBaseAPI* api) { + return api->MyOSD(); +} + +TESS_API const char* TESS_CALL TessVersion() { + return TessBaseAPI::Version(); +} + +TESS_API void TESS_CALL TessDeleteText(const char* text) { + delete[] text; +} + +TESS_API void TESS_CALL TessDeleteTextArray(char** arr) { + for (char** pos = arr; *pos != nullptr; ++pos) { + delete[] * pos; + } + delete[] arr; +} + +TESS_API void TESS_CALL TessDeleteIntArray(const int* arr) { + delete[] arr; +} + +#ifndef DISABLED_LEGACY_ENGINE +TESS_API void TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list) { + TessBaseAPI::DeleteBlockList(block_list); +} +#endif + +TESS_API TessResultRenderer* TESS_CALL +TessTextRendererCreate(const char* outputbase) { + return new TessTextRenderer(outputbase); +} + +TESS_API TessResultRenderer* TESS_CALL +TessHOcrRendererCreate(const char* outputbase) { + return new TessHOcrRenderer(outputbase); +} + +TESS_API TessResultRenderer* TESS_CALL +TessHOcrRendererCreate2(const char* outputbase, BOOL font_info) { + return new TessHOcrRenderer(outputbase, font_info != 0); +} + +TESS_API TessResultRenderer* TESS_CALL +TessAltoRendererCreate(const char* outputbase) { + return new TessAltoRenderer(outputbase); +} + +TESS_API TessResultRenderer* TESS_CALL +TessTsvRendererCreate(const char* outputbase) { + return new TessTsvRenderer(outputbase); +} + +TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate( + const char* outputbase, const char* datadir, BOOL textonly) { + return new TessPDFRenderer(outputbase, datadir, textonly != 0); +} + +TESS_API TessResultRenderer* TESS_CALL +TessUnlvRendererCreate(const char* outputbase) { + return new TessUnlvRenderer(outputbase); +} + +TESS_API TessResultRenderer* TESS_CALL +TessBoxTextRendererCreate(const char* outputbase) { + return new TessBoxTextRenderer(outputbase); +} + +TESS_API TessResultRenderer* TESS_CALL +TessWordStrBoxRendererCreate(const char* outputbase) { + return new TessWordStrBoxRenderer(outputbase); +} + +TESS_API TessResultRenderer* TESS_CALL +TessLSTMBoxRendererCreate(const char* outputbase) { + return new TessLSTMBoxRenderer(outputbase); +} + +TESS_API void TESS_CALL TessDeleteResultRenderer(TessResultRenderer* renderer) { + delete renderer; +} + +TESS_API void TESS_CALL TessResultRendererInsert(TessResultRenderer* renderer, + TessResultRenderer* next) { + renderer->insert(next); +} + +TESS_API TessResultRenderer* TESS_CALL +TessResultRendererNext(TessResultRenderer* renderer) { + return renderer->next(); +} + +TESS_API BOOL TESS_CALL TessResultRendererBeginDocument( + TessResultRenderer* renderer, const char* title) { + return static_cast(renderer->BeginDocument(title)); +} + +TESS_API BOOL TESS_CALL TessResultRendererAddImage(TessResultRenderer* renderer, + TessBaseAPI* api) { + return static_cast(renderer->AddImage(api)); +} + +TESS_API BOOL TESS_CALL +TessResultRendererEndDocument(TessResultRenderer* renderer) { + return static_cast(renderer->EndDocument()); +} + +TESS_API const char* TESS_CALL +TessResultRendererExtention(TessResultRenderer* renderer) { + return renderer->file_extension(); +} + +TESS_API const char* TESS_CALL +TessResultRendererTitle(TessResultRenderer* renderer) { + return renderer->title(); +} + +TESS_API int TESS_CALL +TessResultRendererImageNum(TessResultRenderer* renderer) { + return renderer->imagenum(); +} + +TESS_API TessBaseAPI* TESS_CALL TessBaseAPICreate() { + return new TessBaseAPI; +} + +TESS_API void TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle) { + delete handle; +} + +TESS_API size_t TESS_CALL TessBaseAPIGetOpenCLDevice(TessBaseAPI* /*handle*/, + void** device) { + return TessBaseAPI::getOpenCLDevice(device); +} + +TESS_API void TESS_CALL TessBaseAPISetInputName(TessBaseAPI* handle, + const char* name) { + handle->SetInputName(name); +} + +TESS_API const char* TESS_CALL TessBaseAPIGetInputName(TessBaseAPI* handle) { + return handle->GetInputName(); +} + +TESS_API void TESS_CALL TessBaseAPISetInputImage(TessBaseAPI* handle, + Pix* pix) { + handle->SetInputImage(pix); +} + +TESS_API Pix* TESS_CALL TessBaseAPIGetInputImage(TessBaseAPI* handle) { + return handle->GetInputImage(); +} + +TESS_API int TESS_CALL TessBaseAPIGetSourceYResolution(TessBaseAPI* handle) { + return handle->GetSourceYResolution(); +} + +TESS_API const char* TESS_CALL TessBaseAPIGetDatapath(TessBaseAPI* handle) { + return handle->GetDatapath(); +} + +TESS_API void TESS_CALL TessBaseAPISetOutputName(TessBaseAPI* handle, + const char* name) { + handle->SetOutputName(name); +} + +TESS_API BOOL TESS_CALL TessBaseAPISetVariable(TessBaseAPI* handle, + const char* name, + const char* value) { + return static_cast(handle->SetVariable(name, value)); +} + +TESS_API BOOL TESS_CALL TessBaseAPISetDebugVariable(TessBaseAPI* handle, + const char* name, + const char* value) { + return static_cast(handle->SetDebugVariable(name, value)); +} + +TESS_API BOOL TESS_CALL TessBaseAPIGetIntVariable(const TessBaseAPI* handle, + const char* name, + int* value) { + return static_cast(handle->GetIntVariable(name, value)); +} + +TESS_API BOOL TESS_CALL TessBaseAPIGetBoolVariable(const TessBaseAPI* handle, + const char* name, + BOOL* value) { + bool boolValue; + bool result = handle->GetBoolVariable(name, &boolValue); + if (result) { + *value = static_cast(boolValue); + } + return static_cast(result); +} + +TESS_API BOOL TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle, + const char* name, + double* value) { + return static_cast(handle->GetDoubleVariable(name, value)); +} + +TESS_API const char* TESS_CALL +TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name) { + return handle->GetStringVariable(name); +} + +TESS_API void TESS_CALL TessBaseAPIPrintVariables(const TessBaseAPI* handle, + FILE* fp) { + handle->PrintVariables(fp); +} + +TESS_API BOOL TESS_CALL TessBaseAPIPrintVariablesToFile( + const TessBaseAPI* handle, const char* filename) { + FILE* fp = fopen(filename, "w"); + if (fp != nullptr) { + handle->PrintVariables(fp); + fclose(fp); + return TRUE; + } + return FALSE; +} + +TESS_API BOOL TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle, + const char* name, + STRING* val) { + return static_cast(handle->GetVariableAsString(name, val)); +} + +TESS_API int TESS_CALL TessBaseAPIInit4( + TessBaseAPI* handle, const char* datapath, const char* language, + TessOcrEngineMode mode, char** configs, int configs_size, char** vars_vec, + char** vars_values, size_t vars_vec_size, BOOL set_only_non_debug_params) { + GenericVector varNames; + GenericVector varValues; + if (vars_vec != nullptr && vars_values != nullptr) { + for (size_t i = 0; i < vars_vec_size; i++) { + varNames.push_back(STRING(vars_vec[i])); + varValues.push_back(STRING(vars_values[i])); + } + } + + return handle->Init(datapath, language, mode, configs, configs_size, + &varNames, &varValues, set_only_non_debug_params != 0); +} + +TESS_API int TESS_CALL TessBaseAPIInit1(TessBaseAPI* handle, + const char* datapath, + const char* language, + TessOcrEngineMode oem, char** configs, + int configs_size) { + return handle->Init(datapath, language, oem, configs, configs_size, nullptr, + nullptr, false); +} + +TESS_API int TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle, + const char* datapath, + const char* language, + TessOcrEngineMode oem) { + return handle->Init(datapath, language, oem); +} + +TESS_API int TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle, + const char* datapath, + const char* language) { + return handle->Init(datapath, language); +} + +TESS_API const char* TESS_CALL +TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle) { + return handle->GetInitLanguagesAsString(); +} + +TESS_API char** TESS_CALL +TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle) { + GenericVector languages; + handle->GetLoadedLanguagesAsVector(&languages); + char** arr = new char*[languages.size() + 1]; + for (int index = 0; index < languages.size(); ++index) { + arr[index] = languages[index].strdup(); + } + arr[languages.size()] = nullptr; + return arr; +} + +TESS_API char** TESS_CALL +TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle) { + GenericVector languages; + handle->GetAvailableLanguagesAsVector(&languages); + char** arr = new char*[languages.size() + 1]; + for (int index = 0; index < languages.size(); ++index) { + arr[index] = languages[index].strdup(); + } + arr[languages.size()] = nullptr; + return arr; +} + +#ifndef DISABLED_LEGACY_ENGINE +TESS_API int TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle, + const char* datapath, + const char* language) { + return handle->InitLangMod(datapath, language); +} +#endif + +TESS_API void TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle) { + handle->InitForAnalysePage(); +} + +TESS_API void TESS_CALL TessBaseAPIReadConfigFile(TessBaseAPI* handle, + const char* filename) { + handle->ReadConfigFile(filename); +} + +TESS_API void TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle, + const char* filename) { + handle->ReadDebugConfigFile(filename); +} + +TESS_API void TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle, + TessPageSegMode mode) { + handle->SetPageSegMode(mode); +} + +TESS_API TessPageSegMode TESS_CALL +TessBaseAPIGetPageSegMode(const TessBaseAPI* handle) { + return handle->GetPageSegMode(); +} + +TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, + const unsigned char* imagedata, + int bytes_per_pixel, + int bytes_per_line, int left, int top, + int width, int height) { + return handle->TesseractRect(imagedata, bytes_per_pixel, bytes_per_line, left, + top, width, height); +} + +#ifndef DISABLED_LEGACY_ENGINE +TESS_API void TESS_CALL +TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle) { + handle->ClearAdaptiveClassifier(); +} +#endif + +TESS_API void TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle, + const unsigned char* imagedata, + int width, int height, + int bytes_per_pixel, + int bytes_per_line) { + handle->SetImage(imagedata, width, height, bytes_per_pixel, bytes_per_line); +} + +TESS_API void TESS_CALL TessBaseAPISetImage2(TessBaseAPI* handle, + struct Pix* pix) { + return handle->SetImage(pix); +} + +TESS_API void TESS_CALL TessBaseAPISetSourceResolution(TessBaseAPI* handle, + int ppi) { + handle->SetSourceResolution(ppi); +} + +TESS_API void TESS_CALL TessBaseAPISetRectangle(TessBaseAPI* handle, int left, + int top, int width, + int height) { + handle->SetRectangle(left, top, width, height); +} + +TESS_API void TESS_CALL TessBaseAPISetThresholder( + TessBaseAPI* handle, TessImageThresholder* thresholder) { + handle->SetThresholder(thresholder); +} + +TESS_API struct Pix* TESS_CALL +TessBaseAPIGetThresholdedImage(TessBaseAPI* handle) { + return handle->GetThresholdedImage(); +} + +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle, + struct Pixa** pixa) { + return handle->GetRegions(pixa); +} + +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, + struct Pixa** pixa, + int** blockids) { + return handle->GetTextlines(pixa, blockids); +} + +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines1( + TessBaseAPI* handle, const BOOL raw_image, const int raw_padding, + struct Pixa** pixa, int** blockids, int** paraids) { + return handle->GetTextlines(raw_image != 0, raw_padding, pixa, blockids, + paraids); +} + +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle, + struct Pixa** pixa, + int** blockids) { + return handle->GetStrips(pixa, blockids); +} + +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle, + struct Pixa** pixa) { + return handle->GetWords(pixa); +} + +TESS_API struct Boxa* TESS_CALL +TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc) { + return handle->GetConnectedComponents(cc); +} + +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetComponentImages( + TessBaseAPI* handle, TessPageIteratorLevel level, BOOL text_only, + struct Pixa** pixa, int** blockids) { + return handle->GetComponentImages(level, static_cast(text_only), pixa, + blockids); +} + +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetComponentImages1( + TessBaseAPI* handle, const TessPageIteratorLevel level, + const BOOL text_only, const BOOL raw_image, const int raw_padding, + struct Pixa** pixa, int** blockids, int** paraids) { + return handle->GetComponentImages(level, static_cast(text_only), + raw_image != 0, raw_padding, pixa, blockids, + paraids); +} + +TESS_API int TESS_CALL +TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle) { + return handle->GetThresholdedImageScaleFactor(); +} + +TESS_API TessPageIterator* TESS_CALL +TessBaseAPIAnalyseLayout(TessBaseAPI* handle) { + return handle->AnalyseLayout(); +} + + + +TESS_API int TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle, + ETEXT_DESC* monitor) { + return handle->Recognize(monitor); +} + +#ifndef DISABLED_LEGACY_ENGINE +TESS_API int TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle, + ETEXT_DESC* monitor) { + return handle->RecognizeForChopTest(monitor); +} +#endif + +TESS_API BOOL TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle, + const char* filename, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer) { + return static_cast( + handle->ProcessPages(filename, retry_config, timeout_millisec, renderer)); +} + +TESS_API BOOL TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle, + struct Pix* pix, int page_index, + const char* filename, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer) { + return static_cast(handle->ProcessPage( + pix, page_index, filename, retry_config, timeout_millisec, renderer)); +} + +TESS_API TessResultIterator* TESS_CALL +TessBaseAPIGetIterator(TessBaseAPI* handle) { + return handle->GetIterator(); +} + +TESS_API TessMutableIterator* TESS_CALL +TessBaseAPIGetMutableIterator(TessBaseAPI* handle) { + return handle->GetMutableIterator(); +} + +TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle) { + return handle->GetUTF8Text(); +} + +TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, + int page_number) { + return handle->GetHOCRText(nullptr, page_number); +} + +TESS_API char* TESS_CALL TessBaseAPIGetAltoText(TessBaseAPI* handle, + int page_number) { + return handle->GetAltoText(page_number); +} + +TESS_API char* TESS_CALL TessBaseAPIGetTsvText(TessBaseAPI* handle, + int page_number) { + return handle->GetTSVText(page_number); +} + +TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle, + int page_number) { + return handle->GetBoxText(page_number); +} + +TESS_API char* TESS_CALL TessBaseAPIGetWordStrBoxText(TessBaseAPI* handle, + int page_number) { + return handle->GetWordStrBoxText(page_number); +} + +TESS_API char* TESS_CALL TessBaseAPIGetLSTMBoxText(TessBaseAPI* handle, + int page_number) { + return handle->GetLSTMBoxText(page_number); +} + +TESS_API char* TESS_CALL TessBaseAPIGetUNLVText(TessBaseAPI* handle) { + return handle->GetUNLVText(); +} + +TESS_API int TESS_CALL TessBaseAPIMeanTextConf(TessBaseAPI* handle) { + return handle->MeanTextConf(); +} + +TESS_API int* TESS_CALL TessBaseAPIAllWordConfidences(TessBaseAPI* handle) { + return handle->AllWordConfidences(); +} + +#ifndef DISABLED_LEGACY_ENGINE +TESS_API BOOL TESS_CALL TessBaseAPIAdaptToWordStr(TessBaseAPI* handle, + TessPageSegMode mode, + const char* wordstr) { + return static_cast(handle->AdaptToWordStr(mode, wordstr)); +} +#endif + +TESS_API void TESS_CALL TessBaseAPIClear(TessBaseAPI* handle) { + handle->Clear(); +} + +TESS_API void TESS_CALL TessBaseAPIEnd(TessBaseAPI* handle) { + handle->End(); +} + +TESS_API int TESS_CALL TessBaseAPIIsValidWord(TessBaseAPI* handle, + const char* word) { + return handle->IsValidWord(word); +} + +TESS_API BOOL TESS_CALL TessBaseAPIGetTextDirection(TessBaseAPI* handle, + int* out_offset, + float* out_slope) { + return static_cast(handle->GetTextDirection(out_offset, out_slope)); +} + +TESS_API void TESS_CALL TessBaseAPISetDictFunc(TessBaseAPI* handle, + TessDictFunc f) { + handle->SetDictFunc(f); +} + +TESS_API void TESS_CALL +TessBaseAPIClearPersistentCache(TessBaseAPI* /*handle*/) { + TessBaseAPI::ClearPersistentCache(); +} + +TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc( + TessBaseAPI* handle, TessProbabilityInContextFunc f) { + handle->SetProbabilityInContextFunc(f); +} + +#ifndef DISABLED_LEGACY_ENGINE + +TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript( + TessBaseAPI* handle, int* orient_deg, float* orient_conf, + const char** script_name, float* script_conf) { + bool success; + success = handle->DetectOrientationScript(orient_deg, orient_conf, + script_name, script_conf); + return static_cast(success); +} + +TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob( + TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features, + int* num_features, int* FeatureOutlineIndex) { + handle->GetFeaturesForBlob(blob, int_features, num_features, + FeatureOutlineIndex); +} + +TESS_API ROW* TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top, + int right, int bottom) { + return TessBaseAPI::FindRowForBox(blocks, left, top, right, bottom); +} + +TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier( + TessBaseAPI* handle, TBLOB* blob, int num_max_matches, int* unichar_ids, + float* ratings, int* num_matches_returned) { + handle->RunAdaptiveClassifier(blob, num_max_matches, unichar_ids, ratings, + num_matches_returned); +} + +#endif // ndef DISABLED_LEGACY_ENGINE + +TESS_API const char* TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, + int unichar_id) { + return handle->GetUnichar(unichar_id); +} + +TESS_API const TessDawg* TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, + int i) { + return handle->GetDawg(i); +} + +TESS_API int TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle) { + return handle->NumDawgs(); +} + +#ifndef DISABLED_LEGACY_ENGINE +TESS_API ROW* TESS_CALL TessMakeTessOCRRow(float baseline, float xheight, + float descender, float ascender) { + return TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender); +} + +TESS_API TBLOB* TESS_CALL TessMakeTBLOB(struct Pix* pix) { + return TessBaseAPI::MakeTBLOB(pix); +} + +TESS_API void TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row, + BOOL numeric_mode) { + TessBaseAPI::NormalizeTBLOB(tblob, row, static_cast(numeric_mode)); +} +#endif // ndef DISABLED_LEGACY_ENGINE + +TESS_API TessOcrEngineMode TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle) { + return handle->oem(); +} + +TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, + TessTruthCallback* cb) { + handle->InitTruthCallback(cb); +} + +TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, + double margin) { + handle->set_min_orientation_margin(margin); +} + +TESS_API void TESS_CALL TessBaseGetBlockTextOrientations( + TessBaseAPI* handle, int** block_orientation, bool** vertical_writing) { + handle->GetBlockTextOrientations(block_orientation, vertical_writing); +} + +#ifndef DISABLED_LEGACY_ENGINE +TESS_API BLOCK_LIST* TESS_CALL +TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle) { + return handle->FindLinesCreateBlockList(); +} +#endif + +TESS_API void TESS_CALL TessPageIteratorDelete(TessPageIterator* handle) { + delete handle; +} + +TESS_API TessPageIterator* TESS_CALL +TessPageIteratorCopy(const TessPageIterator* handle) { + return new TessPageIterator(*handle); +} + +TESS_API void TESS_CALL TessPageIteratorBegin(TessPageIterator* handle) { + handle->Begin(); +} + +TESS_API BOOL TESS_CALL TessPageIteratorNext(TessPageIterator* handle, + TessPageIteratorLevel level) { + return static_cast(handle->Next(level)); +} + +TESS_API BOOL TESS_CALL TessPageIteratorIsAtBeginningOf( + const TessPageIterator* handle, TessPageIteratorLevel level) { + return static_cast(handle->IsAtBeginningOf(level)); +} + +TESS_API BOOL TESS_CALL TessPageIteratorIsAtFinalElement( + const TessPageIterator* handle, TessPageIteratorLevel level, + TessPageIteratorLevel element) { + return static_cast(handle->IsAtFinalElement(level, element)); +} + +TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox( + const TessPageIterator* handle, TessPageIteratorLevel level, int* left, + int* top, int* right, int* bottom) { + return static_cast(handle->BoundingBox(level, left, top, right, bottom)); +} + +TESS_API TessPolyBlockType TESS_CALL +TessPageIteratorBlockType(const TessPageIterator* handle) { + return handle->BlockType(); +} + +TESS_API struct Pix* TESS_CALL TessPageIteratorGetBinaryImage( + const TessPageIterator* handle, TessPageIteratorLevel level) { + return handle->GetBinaryImage(level); +} + +TESS_API struct Pix* TESS_CALL TessPageIteratorGetImage( + const TessPageIterator* handle, TessPageIteratorLevel level, int padding, + struct Pix* original_image, int* left, int* top) { + return handle->GetImage(level, padding, original_image, left, top); +} + +TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, + TessPageIteratorLevel level, + int* x1, int* y1, int* x2, + int* y2) { + return static_cast(handle->Baseline(level, x1, y1, x2, y2)); +} + +TESS_API void TESS_CALL TessPageIteratorOrientation( + TessPageIterator* handle, TessOrientation* orientation, + TessWritingDirection* writing_direction, TessTextlineOrder* textline_order, + float* deskew_angle) { + handle->Orientation(orientation, writing_direction, textline_order, + deskew_angle); +} + +TESS_API void TESS_CALL TessPageIteratorParagraphInfo( + TessPageIterator* handle, TessParagraphJustification* justification, + BOOL* is_list_item, BOOL* is_crown, int* first_line_indent) { + bool bool_is_list_item; + bool bool_is_crown; + handle->ParagraphInfo(justification, &bool_is_list_item, &bool_is_crown, + first_line_indent); + if (is_list_item != nullptr) { + *is_list_item = static_cast(bool_is_list_item); + } + if (is_crown != nullptr) { + *is_crown = static_cast(bool_is_crown); + } +} + +TESS_API void TESS_CALL TessResultIteratorDelete(TessResultIterator* handle) { + delete handle; +} + +TESS_API TessResultIterator* TESS_CALL +TessResultIteratorCopy(const TessResultIterator* handle) { + return new TessResultIterator(*handle); +} + +TESS_API TessPageIterator* TESS_CALL +TessResultIteratorGetPageIterator(TessResultIterator* handle) { + return handle; +} + +TESS_API const TessPageIterator* TESS_CALL +TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle) { + return handle; +} + +TESS_API TessChoiceIterator* TESS_CALL +TessResultIteratorGetChoiceIterator(const TessResultIterator* handle) { + return new TessChoiceIterator(*handle); +} + +TESS_API BOOL TESS_CALL TessResultIteratorNext(TessResultIterator* handle, + TessPageIteratorLevel level) { + return static_cast(handle->Next(level)); +} + +TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text( + const TessResultIterator* handle, TessPageIteratorLevel level) { + return handle->GetUTF8Text(level); +} + +TESS_API float TESS_CALL TessResultIteratorConfidence( + const TessResultIterator* handle, TessPageIteratorLevel level) { + return handle->Confidence(level); +} + +TESS_API const char* TESS_CALL +TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle) { + return handle->WordRecognitionLanguage(); +} + +TESS_API const char* TESS_CALL TessResultIteratorWordFontAttributes( + const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic, + BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, BOOL* is_smallcaps, + int* pointsize, int* font_id) { + bool bool_is_bold; + bool bool_is_italic; + bool bool_is_underlined; + bool bool_is_monospace; + bool bool_is_serif; + bool bool_is_smallcaps; + const char* ret = handle->WordFontAttributes( + &bool_is_bold, &bool_is_italic, &bool_is_underlined, &bool_is_monospace, + &bool_is_serif, &bool_is_smallcaps, pointsize, font_id); + if (is_bold != nullptr) { + *is_bold = static_cast(bool_is_bold); + } + if (is_italic != nullptr) { + *is_italic = static_cast(bool_is_italic); + } + if (is_underlined != nullptr) { + *is_underlined = static_cast(bool_is_underlined); + } + if (is_monospace != nullptr) { + *is_monospace = static_cast(bool_is_monospace); + } + if (is_serif != nullptr) { + *is_serif = static_cast(bool_is_serif); + } + if (is_smallcaps != nullptr) { + *is_smallcaps = static_cast(bool_is_smallcaps); + } + return ret; +} + +TESS_API BOOL TESS_CALL +TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle) { + return static_cast(handle->WordIsFromDictionary()); +} + +TESS_API BOOL TESS_CALL +TessResultIteratorWordIsNumeric(const TessResultIterator* handle) { + return static_cast(handle->WordIsNumeric()); +} + +TESS_API BOOL TESS_CALL +TessResultIteratorSymbolIsSuperscript(const TessResultIterator* handle) { + return static_cast(handle->SymbolIsSuperscript()); +} + +TESS_API BOOL TESS_CALL +TessResultIteratorSymbolIsSubscript(const TessResultIterator* handle) { + return static_cast(handle->SymbolIsSubscript()); +} + +TESS_API BOOL TESS_CALL +TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle) { + return static_cast(handle->SymbolIsDropcap()); +} + +TESS_API void TESS_CALL TessChoiceIteratorDelete(TessChoiceIterator* handle) { + delete handle; +} + +TESS_API BOOL TESS_CALL TessChoiceIteratorNext(TessChoiceIterator* handle) { + return static_cast(handle->Next()); +} + +TESS_API const char* TESS_CALL +TessChoiceIteratorGetUTF8Text(const TessChoiceIterator* handle) { + return handle->GetUTF8Text(); +} + +TESS_API float TESS_CALL +TessChoiceIteratorConfidence(const TessChoiceIterator* handle) { + return handle->Confidence(); +} + +TESS_API ETEXT_DESC* TESS_CALL TessMonitorCreate() { + return new ETEXT_DESC(); +} + +TESS_API void TESS_CALL TessMonitorDelete(ETEXT_DESC* monitor) { + delete monitor; +} + +TESS_API void TESS_CALL TessMonitorSetCancelFunc(ETEXT_DESC* monitor, + TessCancelFunc cancelFunc) { + monitor->cancel = cancelFunc; +} + +TESS_API void TESS_CALL TessMonitorSetCancelThis(ETEXT_DESC* monitor, + void* cancelThis) { + monitor->cancel_this = cancelThis; +} + +TESS_API void* TESS_CALL TessMonitorGetCancelThis(ETEXT_DESC* monitor) { + return monitor->cancel_this; +} + +TESS_API void TESS_CALL +TessMonitorSetProgressFunc(ETEXT_DESC* monitor, TessProgressFunc progressFunc) { + monitor->progress_callback2 = progressFunc; +} + +TESS_API int TESS_CALL TessMonitorGetProgress(ETEXT_DESC* monitor) { + return monitor->progress; +} + +TESS_API void TESS_CALL TessMonitorSetDeadlineMSecs(ETEXT_DESC* monitor, + int deadline) { + monitor->set_deadline_msecs(deadline); +} diff --git a/third_party/ocr/tesseract-ocr/src/capi.h b/third_party/ocr/tesseract-ocr/src/capi.h new file mode 100644 index 00000000..8752816a --- /dev/null +++ b/third_party/ocr/tesseract-ocr/src/capi.h @@ -0,0 +1,630 @@ +/////////////////////////////////////////////////////////////////////// +// File: capi.h +// Description: C-API TessBaseAPI +// +// (C) Copyright 2012, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef API_CAPI_H_ +#define API_CAPI_H_ + +#if defined(TESSERACT_API_BASEAPI_H_) && !defined(TESS_CAPI_INCLUDE_BASEAPI) +# define TESS_CAPI_INCLUDE_BASEAPI +#endif + +#ifdef TESS_CAPI_INCLUDE_BASEAPI +# include "baseapi.h" +# include "ocrclass.h" +# include "pageiterator.h" +# include "renderer.h" +# include "resultiterator.h" +#else +# include +# include +# include "platform.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef TESS_CALL +# if defined(WIN32) +# define TESS_CALL __cdecl +# else +# define TESS_CALL +# endif +#endif + +#ifndef BOOL +# define BOOL int +# define TRUE 1 +# define FALSE 0 +#endif + +#ifdef TESS_CAPI_INCLUDE_BASEAPI +typedef tesseract::TessResultRenderer TessResultRenderer; +typedef tesseract::TessTextRenderer TessTextRenderer; +typedef tesseract::TessHOcrRenderer TessHOcrRenderer; +typedef tesseract::TessAltoRenderer TessAltoRenderer; +typedef tesseract::TessTsvRenderer TessTsvRenderer; +typedef tesseract::TessPDFRenderer TessPDFRenderer; +typedef tesseract::TessUnlvRenderer TessUnlvRenderer; +typedef tesseract::TessBoxTextRenderer TessBoxTextRenderer; +typedef tesseract::TessWordStrBoxRenderer TessWordStrBoxRenderer; +typedef tesseract::TessLSTMBoxRenderer TessLSTMBoxRenderer; +typedef tesseract::TessBaseAPI TessBaseAPI; +typedef tesseract::PageIterator TessPageIterator; +typedef tesseract::ResultIterator TessResultIterator; +typedef tesseract::MutableIterator TessMutableIterator; +typedef tesseract::ChoiceIterator TessChoiceIterator; +typedef tesseract::OcrEngineMode TessOcrEngineMode; +typedef tesseract::PageSegMode TessPageSegMode; +typedef tesseract::ImageThresholder TessImageThresholder; +typedef tesseract::PageIteratorLevel TessPageIteratorLevel; +typedef tesseract::DictFunc TessDictFunc; +typedef tesseract::ProbabilityInContextFunc TessProbabilityInContextFunc; +// typedef tesseract::ParamsModelClassifyFunc TessParamsModelClassifyFunc; +typedef tesseract::FillLatticeFunc TessFillLatticeFunc; +typedef tesseract::Dawg TessDawg; +typedef tesseract::TruthCallback TessTruthCallback; +typedef tesseract::Orientation TessOrientation; +typedef tesseract::ParagraphJustification TessParagraphJustification; +typedef tesseract::WritingDirection TessWritingDirection; +typedef tesseract::TextlineOrder TessTextlineOrder; +typedef PolyBlockType TessPolyBlockType; +#else +typedef struct TessResultRenderer TessResultRenderer; +typedef struct TessTextRenderer TessTextRenderer; +typedef struct TessHOcrRenderer TessHOcrRenderer; +typedef struct TessPDFRenderer TessPDFRenderer; +typedef struct TessUnlvRenderer TessUnlvRenderer; +typedef struct TessBoxTextRenderer TessBoxTextRenderer; +typedef struct TessBaseAPI TessBaseAPI; +typedef struct TessPageIterator TessPageIterator; +typedef struct TessResultIterator TessResultIterator; +typedef struct TessMutableIterator TessMutableIterator; +typedef struct TessChoiceIterator TessChoiceIterator; +typedef enum TessOcrEngineMode { + OEM_TESSERACT_ONLY, + OEM_LSTM_ONLY, + OEM_TESSERACT_LSTM_COMBINED, + OEM_DEFAULT +} TessOcrEngineMode; +typedef enum TessPageSegMode { + PSM_OSD_ONLY, + PSM_AUTO_OSD, + PSM_AUTO_ONLY, + PSM_AUTO, + PSM_SINGLE_COLUMN, + PSM_SINGLE_BLOCK_VERT_TEXT, + PSM_SINGLE_BLOCK, + PSM_SINGLE_LINE, + PSM_SINGLE_WORD, + PSM_CIRCLE_WORD, + PSM_SINGLE_CHAR, + PSM_SPARSE_TEXT, + PSM_SPARSE_TEXT_OSD, + PSM_RAW_LINE, + PSM_COUNT +} TessPageSegMode; +typedef enum TessPageIteratorLevel { + RIL_BLOCK, + RIL_PARA, + RIL_TEXTLINE, + RIL_WORD, + RIL_SYMBOL +} TessPageIteratorLevel; +typedef enum TessPolyBlockType { + PT_UNKNOWN, + PT_FLOWING_TEXT, + PT_HEADING_TEXT, + PT_PULLOUT_TEXT, + PT_EQUATION, + PT_INLINE_EQUATION, + PT_TABLE, + PT_VERTICAL_TEXT, + PT_CAPTION_TEXT, + PT_FLOWING_IMAGE, + PT_HEADING_IMAGE, + PT_PULLOUT_IMAGE, + PT_HORZ_LINE, + PT_VERT_LINE, + PT_NOISE, + PT_COUNT +} TessPolyBlockType; +typedef enum TessOrientation { + ORIENTATION_PAGE_UP, + ORIENTATION_PAGE_RIGHT, + ORIENTATION_PAGE_DOWN, + ORIENTATION_PAGE_LEFT +} TessOrientation; +typedef enum TessParagraphJustification { + JUSTIFICATION_UNKNOWN, + JUSTIFICATION_LEFT, + JUSTIFICATION_CENTER, + JUSTIFICATION_RIGHT +} TessParagraphJustification; +typedef enum TessWritingDirection { + WRITING_DIRECTION_LEFT_TO_RIGHT, + WRITING_DIRECTION_RIGHT_TO_LEFT, + WRITING_DIRECTION_TOP_TO_BOTTOM +} TessWritingDirection; +typedef enum TessTextlineOrder { + TEXTLINE_ORDER_LEFT_TO_RIGHT, + TEXTLINE_ORDER_RIGHT_TO_LEFT, + TEXTLINE_ORDER_TOP_TO_BOTTOM +} TessTextlineOrder; +typedef struct ETEXT_DESC ETEXT_DESC; +#endif + +typedef bool (*TessCancelFunc)(void* cancel_this, int words); +typedef bool (*TessProgressFunc)(ETEXT_DESC* ths, int left, int right, int top, + int bottom); + +struct Pix; +struct Boxa; +struct Pixa; + +TESS_API int MyOSD(TessBaseAPI* api); + +/* General free functions */ + +TESS_API const char* TESS_CALL TessVersion(); +TESS_API void TESS_CALL TessDeleteText(const char* text); +TESS_API void TESS_CALL TessDeleteTextArray(char** arr); +TESS_API void TESS_CALL TessDeleteIntArray(const int* arr); + +/* Renderer API */ +TESS_API TessResultRenderer* TESS_CALL +TessTextRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TESS_CALL +TessHOcrRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TESS_CALL +TessHOcrRendererCreate2(const char* outputbase, BOOL font_info); +TESS_API TessResultRenderer* TESS_CALL +TessAltoRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TESS_CALL +TessTsvRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate( + const char* outputbase, const char* datadir, BOOL textonly); +TESS_API TessResultRenderer* TESS_CALL +TessUnlvRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TESS_CALL +TessBoxTextRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TESS_CALL +TessLSTMBoxRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TESS_CALL +TessWordStrBoxRendererCreate(const char* outputbase); + +TESS_API void TESS_CALL TessDeleteResultRenderer(TessResultRenderer* renderer); +TESS_API void TESS_CALL TessResultRendererInsert(TessResultRenderer* renderer, + TessResultRenderer* next); +TESS_API TessResultRenderer* TESS_CALL +TessResultRendererNext(TessResultRenderer* renderer); +TESS_API BOOL TESS_CALL TessResultRendererBeginDocument( + TessResultRenderer* renderer, const char* title); +TESS_API BOOL TESS_CALL TessResultRendererAddImage(TessResultRenderer* renderer, + TessBaseAPI* api); +TESS_API BOOL TESS_CALL +TessResultRendererEndDocument(TessResultRenderer* renderer); + +TESS_API const char* TESS_CALL +TessResultRendererExtention(TessResultRenderer* renderer); +TESS_API const char* TESS_CALL +TessResultRendererTitle(TessResultRenderer* renderer); +TESS_API int TESS_CALL TessResultRendererImageNum(TessResultRenderer* renderer); + +/* Base API */ + +TESS_API TessBaseAPI* TESS_CALL TessBaseAPICreate(); +TESS_API void TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle); + +TESS_API size_t TESS_CALL TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, + void** device); + +TESS_API void TESS_CALL TessBaseAPISetInputName(TessBaseAPI* handle, + const char* name); +TESS_API const char* TESS_CALL TessBaseAPIGetInputName(TessBaseAPI* handle); + +TESS_API void TESS_CALL TessBaseAPISetInputImage(TessBaseAPI* handle, + struct Pix* pix); +TESS_API struct Pix* TESS_CALL TessBaseAPIGetInputImage(TessBaseAPI* handle); + +TESS_API int TESS_CALL TessBaseAPIGetSourceYResolution(TessBaseAPI* handle); +TESS_API const char* TESS_CALL TessBaseAPIGetDatapath(TessBaseAPI* handle); + +TESS_API void TESS_CALL TessBaseAPISetOutputName(TessBaseAPI* handle, + const char* name); + +TESS_API BOOL TESS_CALL TessBaseAPISetVariable(TessBaseAPI* handle, + const char* name, + const char* value); +TESS_API BOOL TESS_CALL TessBaseAPISetDebugVariable(TessBaseAPI* handle, + const char* name, + const char* value); + +TESS_API BOOL TESS_CALL TessBaseAPIGetIntVariable(const TessBaseAPI* handle, + const char* name, int* value); +TESS_API BOOL TESS_CALL TessBaseAPIGetBoolVariable(const TessBaseAPI* handle, + const char* name, + BOOL* value); +TESS_API BOOL TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle, + const char* name, + double* value); +TESS_API const char* TESS_CALL +TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name); + +TESS_API void TESS_CALL TessBaseAPIPrintVariables(const TessBaseAPI* handle, + FILE* fp); +TESS_API BOOL TESS_CALL TessBaseAPIPrintVariablesToFile( + const TessBaseAPI* handle, const char* filename); + +#ifdef TESS_CAPI_INCLUDE_BASEAPI + +TESS_API BOOL TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle, + const char* name, + STRING* val); + +TESS_API int TESS_CALL TessBaseAPIInit( + TessBaseAPI* handle, const char* datapath, const char* language, + TessOcrEngineMode mode, char** configs, int configs_size, + const STRING* vars_vec, size_t vars_vec_size, const STRING* vars_values, + size_t vars_values_size, BOOL set_only_init_params); + +#endif // def TESS_CAPI_INCLUDE_BASEAPI + +TESS_API int TESS_CALL TessBaseAPIInit1(TessBaseAPI* handle, + const char* datapath, + const char* language, + TessOcrEngineMode oem, char** configs, + int configs_size); +TESS_API int TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle, + const char* datapath, + const char* language, + TessOcrEngineMode oem); +TESS_API int TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle, + const char* datapath, + const char* language); + +TESS_API int TESS_CALL TessBaseAPIInit4( + TessBaseAPI* handle, const char* datapath, const char* language, + TessOcrEngineMode mode, char** configs, int configs_size, char** vars_vec, + char** vars_values, size_t vars_vec_size, BOOL set_only_non_debug_params); + +TESS_API const char* TESS_CALL +TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle); +TESS_API char** TESS_CALL +TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle); +TESS_API char** TESS_CALL +TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle); + +TESS_API int TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle, + const char* datapath, + const char* language); +TESS_API void TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle); + +TESS_API void TESS_CALL TessBaseAPIReadConfigFile(TessBaseAPI* handle, + const char* filename); +TESS_API void TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle, + const char* filename); + +TESS_API void TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle, + TessPageSegMode mode); +TESS_API TessPageSegMode TESS_CALL +TessBaseAPIGetPageSegMode(const TessBaseAPI* handle); + +TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, + const unsigned char* imagedata, + int bytes_per_pixel, + int bytes_per_line, int left, int top, + int width, int height); + +TESS_API void TESS_CALL TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle); + +TESS_API void TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle, + const unsigned char* imagedata, + int width, int height, + int bytes_per_pixel, + int bytes_per_line); +TESS_API void TESS_CALL TessBaseAPISetImage2(TessBaseAPI* handle, + struct Pix* pix); + +TESS_API void TESS_CALL TessBaseAPISetSourceResolution(TessBaseAPI* handle, + int ppi); + +TESS_API void TESS_CALL TessBaseAPISetRectangle(TessBaseAPI* handle, int left, + int top, int width, int height); + +#ifdef TESS_CAPI_INCLUDE_BASEAPI +TESS_API void TESS_CALL TessBaseAPISetThresholder( + TessBaseAPI* handle, TessImageThresholder* thresholder); +#endif + +TESS_API struct Pix* TESS_CALL +TessBaseAPIGetThresholdedImage(TessBaseAPI* handle); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle, + struct Pixa** pixa); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, + struct Pixa** pixa, + int** blockids); +TESS_API struct Boxa* TESS_CALL +TessBaseAPIGetTextlines1(TessBaseAPI* handle, BOOL raw_image, int raw_padding, + struct Pixa** pixa, int** blockids, int** paraids); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle, + struct Pixa** pixa, + int** blockids); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle, + struct Pixa** pixa); +TESS_API struct Boxa* TESS_CALL +TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetComponentImages( + TessBaseAPI* handle, TessPageIteratorLevel level, BOOL text_only, + struct Pixa** pixa, int** blockids); +TESS_API struct Boxa* TESS_CALL TessBaseAPIGetComponentImages1( + TessBaseAPI* handle, TessPageIteratorLevel level, BOOL text_only, + BOOL raw_image, int raw_padding, struct Pixa** pixa, int** blockids, + int** paraids); + +TESS_API int TESS_CALL +TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle); + +TESS_API TessPageIterator* TESS_CALL +TessBaseAPIAnalyseLayout(TessBaseAPI* handle); + +TESS_API int TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle, + ETEXT_DESC* monitor); + +#ifndef DISABLED_LEGACY_ENGINE +TESS_API int TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle, + ETEXT_DESC* monitor); +#endif + +TESS_API BOOL TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle, + const char* filename, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer); +TESS_API BOOL TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle, + struct Pix* pix, int page_index, + const char* filename, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer); + +TESS_API TessResultIterator* TESS_CALL +TessBaseAPIGetIterator(TessBaseAPI* handle); +TESS_API TessMutableIterator* TESS_CALL +TessBaseAPIGetMutableIterator(TessBaseAPI* handle); + +TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle); +TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, + int page_number); + +TESS_API char* TESS_CALL TessBaseAPIGetAltoText(TessBaseAPI* handle, + int page_number); +TESS_API char* TESS_CALL TessBaseAPIGetTsvText(TessBaseAPI* handle, + int page_number); + +TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle, + int page_number); +TESS_API char* TESS_CALL TessBaseAPIGetLSTMBoxText(TessBaseAPI* handle, + int page_number); +TESS_API char* TESS_CALL TessBaseAPIGetWordStrBoxText(TessBaseAPI* handle, + int page_number); + +TESS_API char* TESS_CALL TessBaseAPIGetUNLVText(TessBaseAPI* handle); +TESS_API int TESS_CALL TessBaseAPIMeanTextConf(TessBaseAPI* handle); + +TESS_API int* TESS_CALL TessBaseAPIAllWordConfidences(TessBaseAPI* handle); + +#ifndef DISABLED_LEGACY_ENGINE +TESS_API BOOL TESS_CALL TessBaseAPIAdaptToWordStr(TessBaseAPI* handle, + TessPageSegMode mode, + const char* wordstr); +#endif // ndef DISABLED_LEGACY_ENGINE + +TESS_API void TESS_CALL TessBaseAPIClear(TessBaseAPI* handle); +TESS_API void TESS_CALL TessBaseAPIEnd(TessBaseAPI* handle); + +TESS_API int TESS_CALL TessBaseAPIIsValidWord(TessBaseAPI* handle, + const char* word); +TESS_API BOOL TESS_CALL TessBaseAPIGetTextDirection(TessBaseAPI* handle, + int* out_offset, + float* out_slope); + +#ifdef TESS_CAPI_INCLUDE_BASEAPI + +TESS_API void TESS_CALL TessBaseAPISetDictFunc(TessBaseAPI* handle, + TessDictFunc f); + +TESS_API void TESS_CALL TessBaseAPIClearPersistentCache(TessBaseAPI* handle); + +TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc( + TessBaseAPI* handle, TessProbabilityInContextFunc f); + +// Call TessDeleteText(*best_script_name) to free memory allocated by this +// function +TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript( + TessBaseAPI* handle, int* orient_deg, float* orient_conf, + const char** script_name, float* script_conf); + +#endif // def TESS_CAPI_INCLUDE_BASEAPI + +TESS_API const char* TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, + int unichar_id); + +TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, + double margin); + +#ifdef TESS_CAPI_INCLUDE_BASEAPI + +TESS_API const TessDawg* TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, + int i); + +TESS_API int TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle); + +TESS_API TessOcrEngineMode TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle); + +TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, + TessTruthCallback* cb); + +TESS_API void TESS_CALL TessBaseGetBlockTextOrientations( + TessBaseAPI* handle, int** block_orientation, bool** vertical_writing); + +#endif + +/* Page iterator */ + +TESS_API void TESS_CALL TessPageIteratorDelete(TessPageIterator* handle); + +TESS_API TessPageIterator* TESS_CALL +TessPageIteratorCopy(const TessPageIterator* handle); + +TESS_API void TESS_CALL TessPageIteratorBegin(TessPageIterator* handle); + +TESS_API BOOL TESS_CALL TessPageIteratorNext(TessPageIterator* handle, + TessPageIteratorLevel level); + +TESS_API BOOL TESS_CALL TessPageIteratorIsAtBeginningOf( + const TessPageIterator* handle, TessPageIteratorLevel level); + +TESS_API BOOL TESS_CALL TessPageIteratorIsAtFinalElement( + const TessPageIterator* handle, TessPageIteratorLevel level, + TessPageIteratorLevel element); + +TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox( + const TessPageIterator* handle, TessPageIteratorLevel level, int* left, + int* top, int* right, int* bottom); + +TESS_API TessPolyBlockType TESS_CALL +TessPageIteratorBlockType(const TessPageIterator* handle); + +TESS_API struct Pix* TESS_CALL TessPageIteratorGetBinaryImage( + const TessPageIterator* handle, TessPageIteratorLevel level); + +TESS_API struct Pix* TESS_CALL TessPageIteratorGetImage( + const TessPageIterator* handle, TessPageIteratorLevel level, int padding, + struct Pix* original_image, int* left, int* top); + +TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, + TessPageIteratorLevel level, + int* x1, int* y1, int* x2, + int* y2); + +TESS_API void TESS_CALL TessPageIteratorOrientation( + TessPageIterator* handle, TessOrientation* orientation, + TessWritingDirection* writing_direction, TessTextlineOrder* textline_order, + float* deskew_angle); + +TESS_API void TESS_CALL TessPageIteratorParagraphInfo( + TessPageIterator* handle, TessParagraphJustification* justification, + BOOL* is_list_item, BOOL* is_crown, int* first_line_indent); + +/* Result iterator */ + +TESS_API void TESS_CALL TessResultIteratorDelete(TessResultIterator* handle); +TESS_API TessResultIterator* TESS_CALL +TessResultIteratorCopy(const TessResultIterator* handle); +TESS_API TessPageIterator* TESS_CALL +TessResultIteratorGetPageIterator(TessResultIterator* handle); +TESS_API const TessPageIterator* TESS_CALL +TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle); +TESS_API TessChoiceIterator* TESS_CALL +TessResultIteratorGetChoiceIterator(const TessResultIterator* handle); + +TESS_API BOOL TESS_CALL TessResultIteratorNext(TessResultIterator* handle, + TessPageIteratorLevel level); +TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text( + const TessResultIterator* handle, TessPageIteratorLevel level); +TESS_API float TESS_CALL TessResultIteratorConfidence( + const TessResultIterator* handle, TessPageIteratorLevel level); +TESS_API const char* TESS_CALL +TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle); +TESS_API const char* TESS_CALL TessResultIteratorWordFontAttributes( + const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic, + BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, BOOL* is_smallcaps, + int* pointsize, int* font_id); + +TESS_API BOOL TESS_CALL +TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle); +TESS_API BOOL TESS_CALL +TessResultIteratorWordIsNumeric(const TessResultIterator* handle); +TESS_API BOOL TESS_CALL +TessResultIteratorSymbolIsSuperscript(const TessResultIterator* handle); +TESS_API BOOL TESS_CALL +TessResultIteratorSymbolIsSubscript(const TessResultIterator* handle); +TESS_API BOOL TESS_CALL +TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle); + +TESS_API void TESS_CALL TessChoiceIteratorDelete(TessChoiceIterator* handle); +TESS_API BOOL TESS_CALL TessChoiceIteratorNext(TessChoiceIterator* handle); +TESS_API const char* TESS_CALL +TessChoiceIteratorGetUTF8Text(const TessChoiceIterator* handle); +TESS_API float TESS_CALL +TessChoiceIteratorConfidence(const TessChoiceIterator* handle); + +/* Progress monitor */ + +TESS_API ETEXT_DESC* TESS_CALL TessMonitorCreate(); +TESS_API void TESS_CALL TessMonitorDelete(ETEXT_DESC* monitor); +TESS_API void TESS_CALL TessMonitorSetCancelFunc(ETEXT_DESC* monitor, + TessCancelFunc cancelFunc); +TESS_API void TESS_CALL TessMonitorSetCancelThis(ETEXT_DESC* monitor, + void* cancelThis); +TESS_API void* TESS_CALL TessMonitorGetCancelThis(ETEXT_DESC* monitor); +TESS_API void TESS_CALL +TessMonitorSetProgressFunc(ETEXT_DESC* monitor, TessProgressFunc progressFunc); +TESS_API int TESS_CALL TessMonitorGetProgress(ETEXT_DESC* monitor); +TESS_API void TESS_CALL TessMonitorSetDeadlineMSecs(ETEXT_DESC* monitor, + int deadline); + +#ifndef DISABLED_LEGACY_ENGINE + +# ifdef TESS_CAPI_INCLUDE_BASEAPI +TESS_API void TESS_CALL TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle, + TessFillLatticeFunc f); + +TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob( + TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features, + int* num_features, int* FeatureOutlineIndex); + +TESS_API ROW* TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top, + int right, int bottom); + +TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier( + TessBaseAPI* handle, TBLOB* blob, int num_max_matches, int* unichar_ids, + float* ratings, int* num_matches_returned); + +TESS_API ROW* TESS_CALL TessMakeTessOCRRow(float baseline, float xheight, + float descender, float ascender); + +TESS_API TBLOB* TESS_CALL TessMakeTBLOB(Pix* pix); + +TESS_API void TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row, + BOOL numeric_mode); + +TESS_API BLOCK_LIST* TESS_CALL +TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle); + +TESS_API void TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list); + +# endif // def TESS_CAPI_INCLUDE_BASEAPI + +#endif // ndef DISABLED_LEGACY_ENGINE + +#ifdef __cplusplus +} +#endif + +#endif // API_CAPI_H_ diff --git a/third_party/ocr/tesseract-ocr/windows/include/tesseract/baseapi.h b/third_party/ocr/tesseract-ocr/windows/include/tesseract/baseapi.h index 3724dd92..fe12351b 100644 --- a/third_party/ocr/tesseract-ocr/windows/include/tesseract/baseapi.h +++ b/third_party/ocr/tesseract-ocr/windows/include/tesseract/baseapi.h @@ -93,6 +93,8 @@ class TESS_API TessBaseAPI { TessBaseAPI(); virtual ~TessBaseAPI(); + int MyOSD(); + /** * Returns the version identifier as a static string. Do not delete. */ diff --git a/third_party/ocr/tesseract-ocr/windows/include/tesseract/capi.h b/third_party/ocr/tesseract-ocr/windows/include/tesseract/capi.h index 7ed64ef4..8752816a 100644 --- a/third_party/ocr/tesseract-ocr/windows/include/tesseract/capi.h +++ b/third_party/ocr/tesseract-ocr/windows/include/tesseract/capi.h @@ -176,6 +176,8 @@ struct Pix; struct Boxa; struct Pixa; +TESS_API int MyOSD(TessBaseAPI* api); + /* General free functions */ TESS_API const char* TESS_CALL TessVersion(); diff --git a/third_party/ocr/tesseract-ocr/windows/lib/x64/tesseract41.lib b/third_party/ocr/tesseract-ocr/windows/lib/x64/tesseract41.lib index 4ea7b753..1d15d743 100644 Binary files a/third_party/ocr/tesseract-ocr/windows/lib/x64/tesseract41.lib and b/third_party/ocr/tesseract-ocr/windows/lib/x64/tesseract41.lib differ diff --git a/third_party/ocr/tesseract-ocr/windows/lib/x64/tesseract41d.lib b/third_party/ocr/tesseract-ocr/windows/lib/x64/tesseract41d.lib index 8b04c12f..2f30a783 100644 Binary files a/third_party/ocr/tesseract-ocr/windows/lib/x64/tesseract41d.lib and b/third_party/ocr/tesseract-ocr/windows/lib/x64/tesseract41d.lib differ diff --git a/third_party/ocr/tesseract-ocr/windows/lib/x86/tesseract41.lib b/third_party/ocr/tesseract-ocr/windows/lib/x86/tesseract41.lib index af246396..f2c49d6f 100644 Binary files a/third_party/ocr/tesseract-ocr/windows/lib/x86/tesseract41.lib and b/third_party/ocr/tesseract-ocr/windows/lib/x86/tesseract41.lib differ diff --git a/third_party/ocr/tesseract-ocr/windows/lib/x86/tesseract41d.lib b/third_party/ocr/tesseract-ocr/windows/lib/x86/tesseract41d.lib index 74cecdeb..ce9be4a0 100644 Binary files a/third_party/ocr/tesseract-ocr/windows/lib/x86/tesseract41d.lib and b/third_party/ocr/tesseract-ocr/windows/lib/x86/tesseract41d.lib differ