linux下暂时禁用tesseract-ocr

2022-10-22 15:15:41 +08:00 · 2022-10-22 15:15:41 +08:00 · 6f9d5de778
parent 080915ddd2
commit 6f9d5de778
86 changed files with 17 additions and 22959 deletions
--- a/build/linux/HGImgProc/HGImgProc.cbp
+++ b/build/linux/HGImgProc/HGImgProc.cbp
@ -29,8 +29,6 @@
 					<Add option="../../../third_party/opencv/uos/amd64/lib/libittnotify.a" />
 					<Add option="../../../third_party/opencv/uos/amd64/lib/libzlib.a" />
 					<Add option="../../../third_party/freetype/uos/amd64/lib/libfreetype.a" />
-					<Add option="../../../third_party/ocr/tesseract-ocr/uos/amd64/lib/libtesseract.a" />
-					<Add option="../../../third_party/leptonica/uos/amd64/lib/libleptonica.a" />
 					<Add option="-L../HGBase/bin/uos_x86_64_Debug -lHGBase" />
 					<Add option="-L../HGImgFmt/bin/uos_x86_64_Debug -lHGImgFmt" />
 					<Add option="-ldl" />
@ -61,8 +59,6 @@
 					<Add option="../../../third_party/opencv/uos/amd64/lib/libittnotify.a" />
 					<Add option="../../../third_party/opencv/uos/amd64/lib/libzlib.a" />
 					<Add option="../../../third_party/freetype/uos/amd64/lib/libfreetype.a" />
-					<Add option="../../../third_party/ocr/tesseract-ocr/uos/amd64/lib/libtesseract.a" />
-					<Add option="../../../third_party/leptonica/uos/amd64/lib/libleptonica.a" />
 					<Add option="-L../../../../release/uos/x86_64 -lHGBase -lHGImgFmt" />
 					<Add option="-ldl" />
 					<Add option="-lpthread" />
@ -495,8 +491,6 @@
 		<Unit filename="../../../modules/imgproc/HGOCRHanvon.hpp" />
 		<Unit filename="../../../modules/imgproc/HGOCRRetImpl.cpp" />
 		<Unit filename="../../../modules/imgproc/HGOCRRetImpl.hpp" />
-		<Unit filename="../../../modules/imgproc/HGOCRTesseract.cpp" />
-		<Unit filename="../../../modules/imgproc/HGOCRTesseract.hpp" />
 		<Unit filename="../../../modules/imgproc/ImageProcess/ImageApply.cpp" />
 		<Unit filename="../../../modules/imgproc/ImageProcess/ImageApply.h" />
 		<Unit filename="../../../modules/imgproc/ImageProcess/ImageApplyAdjustColors.cpp" />
--- a/modules/imgproc/HGOCR.cpp
+++ b/modules/imgproc/HGOCR.cpp
@ -1,7 +1,9 @@
 #include "HGOCR.h"
 #include "HGOCRBase.hpp"
 #include "HGOCRHanvon.hpp"
+#if defined(HG_CMP_MSC)
 #include "HGOCRTesseract.hpp"
+#endif
 #include "HGOCRRetImpl.hpp"

 HGResult HGAPI HGImgProc_CreateOCRMgr(HGUInt algo, HGOCRMgr* ocrMgr)
@ -13,6 +15,7 @@ HGResult HGAPI HGImgProc_CreateOCRMgr(HGUInt algo, HGOCRMgr* ocrMgr)

 	if (HGIMGPROC_OCRALGO_DEFAULT == algo)
 	{
+#if defined(HG_CMP_MSC)
 		HGOCRBase* ocrMgrImpl = new HGOCRHanvon;
 		HGResult ret = ocrMgrImpl->Init();
 		if (HGBASE_ERR_OK != ret)
@ -26,6 +29,15 @@ HGResult HGAPI HGImgProc_CreateOCRMgr(HGUInt algo, HGOCRMgr* ocrMgr)
 				return ret;
 			}
 		}
+#else
+		HGOCRBase* ocrMgrImpl = new HGOCRHanvon;
+		HGResult ret = ocrMgrImpl->Init();
+		if (HGBASE_ERR_OK != ret)
+		{
+			delete ocrMgrImpl;
+			return ret;
+		}
+#endif

 		*ocrMgr = (HGOCRMgr)ocrMgrImpl;
 		return HGBASE_ERR_OK;
@ -45,6 +57,7 @@ HGResult HGAPI HGImgProc_CreateOCRMgr(HGUInt algo, HGOCRMgr* ocrMgr)
 	}
 	else if (HGIMGPROC_OCRALGO_TESSERACT == algo)
 	{
+#if defined(HG_CMP_MSC)
 		HGOCRBase* ocrMgrImpl = new HGOCRTesseract;
 		HGResult ret = ocrMgrImpl->Init();
 		if (HGBASE_ERR_OK != ret)
@ -55,6 +68,10 @@ HGResult HGAPI HGImgProc_CreateOCRMgr(HGUInt algo, HGOCRMgr* ocrMgr)

 		*ocrMgr = (HGOCRMgr)ocrMgrImpl;
 		return HGBASE_ERR_OK;
+#else
+		return HGBASE_ERR_INVALIDARG;
+#endif
+
 	}

 	return HGBASE_ERR_INVALIDARG;
--- a/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/baseapi.h
+++ b/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/baseapi.h
@ -1,812 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        baseapi.h
-// Description: Simple API for calling tesseract.
-// Author:      Ray Smith
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_BASEAPI_H_
-#define TESSERACT_API_BASEAPI_H_
-
-#ifdef HAVE_CONFIG_H
-#  include "config_auto.h" // DISABLED_LEGACY_ENGINE
-#endif
-
-#include "export.h"
-#include "pageiterator.h"
-#include "publictypes.h"
-#include "resultiterator.h"
-#include "unichar.h"
-
-#include "version.h"
-
-#include <cstdio>
-#include <vector> // for std::vector
-
-struct Pix;
-struct Pixa;
-struct Boxa;
-
-namespace tesseract {
-
-class PAGE_RES;
-class ParagraphModel;
-class BLOCK_LIST;
-class ETEXT_DESC;
-struct OSResults;
-class UNICHARSET;
-
-class Dawg;
-class Dict;
-class EquationDetect;
-class PageIterator;
-class ImageThresholder;
-class LTRResultIterator;
-class ResultIterator;
-class MutableIterator;
-class TessResultRenderer;
-class Tesseract;
-
-// Function to read a std::vector<char> from a whole file.
-// Returns false on failure.
-using FileReader = bool (*)(const char *filename, std::vector<char> *data);
-
-using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
-                               bool) const;
-using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
-                                                  int, const char *, int);
-
-/**
- * Base class for all tesseract APIs.
- * Specific classes can add ability to work on different inputs or produce
- * different outputs.
- * This class is mostly an interface layer on top of the Tesseract instance
- * class to hide the data types so that users of this class don't have to
- * include any other Tesseract headers.
- */
-class TESS_API TessBaseAPI {
-public:
-  TessBaseAPI();
-  virtual ~TessBaseAPI();
-  // Copy constructor and assignment operator are currently unsupported.
-  TessBaseAPI(TessBaseAPI const &) = delete;
-  TessBaseAPI &operator=(TessBaseAPI const &) = delete;
-
-  /**
-   * Returns the version identifier as a static string. Do not delete.
-   */
-  static const char *Version();
-
-  /**
-   * If compiled with OpenCL AND an available OpenCL
-   * device is deemed faster than serial code, then
-   * "device" is populated with the cl_device_id
-   * and returns sizeof(cl_device_id)
-   * otherwise *device=nullptr and returns 0.
-   */
-  static size_t getOpenCLDevice(void **device);
-
-  /**
-   * Set the name of the input file. Needed for training and
-   * reading a UNLV zone file, and for searchable PDF output.
-   */
-  void SetInputName(const char *name);
-  /**
-   * These functions are required for searchable PDF output.
-   * We need our hands on the input file so that we can include
-   * it in the PDF without transcoding. If that is not possible,
-   * we need the original image. Finally, resolution metadata
-   * is stored in the PDF so we need that as well.
-   */
-  const char *GetInputName();
-  // Takes ownership of the input pix.
-  void SetInputImage(Pix *pix);
-  Pix *GetInputImage();
-  int GetSourceYResolution();
-  const char *GetDatapath();
-
-  /** Set the name of the bonus output files. Needed only for debugging. */
-  void SetOutputName(const char *name);
-
-  /**
-   * Set the value of an internal "parameter."
-   * Supply the name of the parameter and the value as a string, just as
-   * you would in a config file.
-   * Returns false if the name lookup failed.
-   * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
-   * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
-   * SetVariable may be used before Init, but settings will revert to
-   * defaults on End().
-   *
-   * Note: Must be called after Init(). Only works for non-init variables
-   * (init variables should be passed to Init()).
-   */
-  bool SetVariable(const char *name, const char *value);
-  bool SetDebugVariable(const char *name, const char *value);
-
-  /**
-   * Returns true if the parameter was found among Tesseract parameters.
-   * Fills in value with the value of the parameter.
-   */
-  bool GetIntVariable(const char *name, int *value) const;
-  bool GetBoolVariable(const char *name, bool *value) const;
-  bool GetDoubleVariable(const char *name, double *value) const;
-
-  /**
-   * Returns the pointer to the string that represents the value of the
-   * parameter if it was found among Tesseract parameters.
-   */
-  const char *GetStringVariable(const char *name) const;
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-  /**
-   * Print Tesseract fonts table to the given file.
-   */
-  void PrintFontsTable(FILE *fp) const;
-
-#endif
-
-  /**
-   * Print Tesseract parameters to the given file.
-   */
-  void PrintVariables(FILE *fp) const;
-
-  /**
-   * Get value of named variable as a string, if it exists.
-   */
-  bool GetVariableAsString(const char *name, std::string *val) const;
-
-  /**
-   * Instances are now mostly thread-safe and totally independent,
-   * but some global parameters remain. Basically it is safe to use multiple
-   * TessBaseAPIs in different threads in parallel, UNLESS:
-   * you use SetVariable on some of the Params in classify and textord.
-   * If you do, then the effect will be to change it for all your instances.
-   *
-   * Start tesseract. Returns zero on success and -1 on failure.
-   * NOTE that the only members that may be called before Init are those
-   * listed above here in the class definition.
-   *
-   * The datapath must be the name of the tessdata directory.
-   * The language is (usually) an ISO 639-3 string or nullptr will default to
-   * eng. It is entirely safe (and eventually will be efficient too) to call
-   * Init multiple times on the same instance to change language, or just
-   * to reset the classifier.
-   * The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
-   * that multiple languages are to be loaded. Eg hin+eng will load Hindi and
-   * English. Languages may specify internally that they want to be loaded
-   * with one or more other languages, so the ~ sign is available to override
-   * that. Eg if hin were set to load eng by default, then hin+~eng would force
-   * loading only hin. The number of loaded languages is limited only by
-   * memory, with the caveat that loading additional languages will impact
-   * both speed and accuracy, as there is more work to do to decide on the
-   * applicable language, and there is more chance of hallucinating incorrect
-   * words.
-   * WARNING: On changing languages, all Tesseract parameters are reset
-   * back to their default values. (Which may vary between languages.)
-   * If you have a rare need to set a Variable that controls
-   * initialization for a second call to Init you should explicitly
-   * call End() and then use SetVariable before Init. This is only a very
-   * rare use case, since there are very few uses that require any parameters
-   * to be set before Init.
-   *
-   * If set_only_non_debug_params is true, only params that do not contain
-   * "debug" in the name will be set.
-   */
-  int Init(const char *datapath, const char *language, OcrEngineMode mode,
-           char **configs, int configs_size,
-           const std::vector<std::string> *vars_vec,
-           const std::vector<std::string> *vars_values,
-           bool set_only_non_debug_params);
-  int Init(const char *datapath, const char *language, OcrEngineMode oem) {
-    return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
-  }
-  int Init(const char *datapath, const char *language) {
-    return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
-                false);
-  }
-  // In-memory version reads the traineddata file directly from the given
-  // data[data_size] array, and/or reads data via a FileReader.
-  int Init(const char *data, int data_size, const char *language,
-           OcrEngineMode mode, char **configs, int configs_size,
-           const std::vector<std::string> *vars_vec,
-           const std::vector<std::string> *vars_values,
-           bool set_only_non_debug_params, FileReader reader);
-
-  /**
-   * Returns the languages string used in the last valid initialization.
-   * If the last initialization specified "deu+hin" then that will be
-   * returned. If hin loaded eng automatically as well, then that will
-   * not be included in this list. To find the languages actually
-   * loaded use GetLoadedLanguagesAsVector.
-   * The returned string should NOT be deleted.
-   */
-  const char *GetInitLanguagesAsString() const;
-
-  /**
-   * Returns the loaded languages in the vector of std::string.
-   * Includes all languages loaded by the last Init, including those loaded
-   * as dependencies of other loaded languages.
-   */
-  void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
-
-  /**
-   * Returns the available languages in the sorted vector of std::string.
-   */
-  void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
-
-  /**
-   * Init only for page layout analysis. Use only for calls to SetImage and
-   * AnalysePage. Calls that attempt recognition will generate an error.
-   */
-  void InitForAnalysePage();
-
-  /**
-   * Read a "config" file containing a set of param, value pairs.
-   * Searches the standard places: tessdata/configs, tessdata/tessconfigs
-   * and also accepts a relative or absolute path name.
-   * Note: only non-init params will be set (init params are set by Init()).
-   */
-  void ReadConfigFile(const char *filename);
-  /** Same as above, but only set debug params from the given config file. */
-  void ReadDebugConfigFile(const char *filename);
-
-  /**
-   * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
-   * The mode is stored as an IntParam so it can also be modified by
-   * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
-   */
-  void SetPageSegMode(PageSegMode mode);
-
-  /** Return the current page segmentation mode. */
-  PageSegMode GetPageSegMode() const;
-
-  /**
-   * Recognize a rectangle from an image and return the result as a string.
-   * May be called many times for a single Init.
-   * Currently has no error checking.
-   * Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
-   * Palette color images will not work properly and must be converted to
-   * 24 bit.
-   * Binary images of 1 bit per pixel may also be given but they must be
-   * byte packed with the MSB of the first byte being the first pixel, and a
-   * 1 represents WHITE. For binary images set bytes_per_pixel=0.
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   *
-   * Note that TesseractRect is the simplified convenience interface.
-   * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
-   * and one or more of the Get*Text functions below.
-   */
-  char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
-                      int bytes_per_line, int left, int top, int width,
-                      int height);
-
-  /**
-   * Call between pages or documents etc to free up memory and forget
-   * adaptive data.
-   */
-  void ClearAdaptiveClassifier();
-
-  /**
-   * @defgroup AdvancedAPI Advanced API
-   * The following methods break TesseractRect into pieces, so you can
-   * get hold of the thresholded image, get the text in different formats,
-   * get bounding boxes, confidences etc.
-   */
-  /* @{ */
-
-  /**
-   * Provide an image for Tesseract to recognize. Format is as
-   * TesseractRect above. Copies the image buffer and converts to Pix.
-   * SetImage clears all recognition results, and sets the rectangle to the
-   * full image, so it may be followed immediately by a GetUTF8Text, and it
-   * will automatically perform recognition.
-   */
-  void SetImage(const unsigned char *imagedata, int width, int height,
-                int bytes_per_pixel, int bytes_per_line);
-
-  /**
-   * Provide an image for Tesseract to recognize. As with SetImage above,
-   * Tesseract takes its own copy of the image, so it need not persist until
-   * after Recognize.
-   * Pix vs raw, which to use?
-   * Use Pix where possible. Tesseract uses Pix as its internal representation
-   * and it is therefore more efficient to provide a Pix directly.
-   */
-  void SetImage(Pix *pix);
-
-  /**
-   * Set the resolution of the source image in pixels per inch so font size
-   * information can be calculated in results.  Call this after SetImage().
-   */
-  void SetSourceResolution(int ppi);
-
-  /**
-   * Restrict recognition to a sub-rectangle of the image. Call after SetImage.
-   * Each SetRectangle clears the recogntion results so multiple rectangles
-   * can be recognized with the same image.
-   */
-  void SetRectangle(int left, int top, int width, int height);
-
-  /**
-   * Get a copy of the internal thresholded image from Tesseract.
-   * Caller takes ownership of the Pix and must pixDestroy it.
-   * May be called any time after SetImage, or after TesseractRect.
-   */
-  Pix *GetThresholdedImage();
-
-  /**
-   * Get the result of page layout analysis as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   */
-  Boxa *GetRegions(Pixa **pixa);
-
-  /**
-   * Get the textlines as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * If raw_image is true, then extract from the original image instead of the
-   * thresholded image and pad by raw_padding pixels.
-   * If blockids is not nullptr, the block-id of each line is also returned as
-   * an array of one element per line. delete [] after use. If paraids is not
-   * nullptr, the paragraph-id of each line within its block is also returned as
-   * an array of one element per line. delete [] after use.
-   */
-  Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
-                     int **blockids, int **paraids);
-  /*
-   Helper method to extract from the thresholded image. (most common usage)
-*/
-  Boxa *GetTextlines(Pixa **pixa, int **blockids) {
-    return GetTextlines(false, 0, pixa, blockids, nullptr);
-  }
-
-  /**
-   * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
-   * pair, in reading order. Enables downstream handling of non-rectangular
-   * regions.
-   * Can be called before or after Recognize.
-   * If blockids is not nullptr, the block-id of each line is also returned as
-   * an array of one element per line. delete [] after use.
-   */
-  Boxa *GetStrips(Pixa **pixa, int **blockids);
-
-  /**
-   * Get the words as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   */
-  Boxa *GetWords(Pixa **pixa);
-
-  /**
-   * Gets the individual connected (text) components (created
-   * after pages segmentation step, but before recognition)
-   * as a leptonica-style Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * Note: the caller is responsible for calling boxaDestroy()
-   * on the returned Boxa array and pixaDestroy() on cc array.
-   */
-  Boxa *GetConnectedComponents(Pixa **cc);
-
-  /**
-   * Get the given level kind of components (block, textline, word etc.) as a
-   * leptonica-style Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * If blockids is not nullptr, the block-id of each component is also returned
-   * as an array of one element per component. delete [] after use.
-   * If blockids is not nullptr, the paragraph-id of each component with its
-   * block is also returned as an array of one element per component. delete []
-   * after use. If raw_image is true, then portions of the original image are
-   * extracted instead of the thresholded image and padded with raw_padding. If
-   * text_only is true, then only text components are returned.
-   */
-  Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
-                           bool raw_image, int raw_padding, Pixa **pixa,
-                           int **blockids, int **paraids);
-  // Helper function to get binary images with no padding (most common usage).
-  Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
-                           Pixa **pixa, int **blockids) {
-    return GetComponentImages(level, text_only, false, 0, pixa, blockids,
-                              nullptr);
-  }
-
-  /**
-   * Returns the scale factor of the thresholded image that would be returned by
-   * GetThresholdedImage() and the various GetX() methods that call
-   * GetComponentImages().
-   * Returns 0 if no thresholder has been set.
-   */
-  int GetThresholdedImageScaleFactor() const;
-
-  /**
-   * Runs page layout analysis in the mode set by SetPageSegMode.
-   * May optionally be called prior to Recognize to get access to just
-   * the page layout results. Returns an iterator to the results.
-   * If merge_similar_words is true, words are combined where suitable for use
-   * with a line recognizer. Use if you want to use AnalyseLayout to find the
-   * textlines, and then want to process textline fragments with an external
-   * line recognizer.
-   * Returns nullptr on error or an empty page.
-   * The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  PageIterator *AnalyseLayout();
-  PageIterator *AnalyseLayout(bool merge_similar_words);
-
-  /**
-   * Recognize the image from SetAndThresholdImage, generating Tesseract
-   * internal structures. Returns 0 on success.
-   * Optional. The Get*Text functions below will call Recognize if needed.
-   * After Recognize, the output is kept internally until the next SetImage.
-   */
-  int Recognize(ETEXT_DESC *monitor);
-
-  /**
-   * Methods to retrieve information after SetAndThresholdImage(),
-   * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
-   */
-
-  /**
-   * Turns images into symbolic text.
-   *
-   * filename can point to a single image, a multi-page TIFF,
-   * or a plain text list of image filenames.
-   *
-   * retry_config is useful for debugging. If not nullptr, you can fall
-   * back to an alternate configuration if a page fails for some
-   * reason.
-   *
-   * timeout_millisec terminates processing if any single page
-   * takes too long. Set to 0 for unlimited time.
-   *
-   * renderer is responible for creating the output. For example,
-   * use the TessTextRenderer if you want plaintext output, or
-   * the TessPDFRender to produce searchable PDF.
-   *
-   * If tessedit_page_number is non-negative, will only process that
-   * single page. Works for multi-page tiff file, or filelist.
-   *
-   * Returns true if successful, false on error.
-   */
-  bool ProcessPages(const char *filename, const char *retry_config,
-                    int timeout_millisec, TessResultRenderer *renderer);
-  // Does the real work of ProcessPages.
-  bool ProcessPagesInternal(const char *filename, const char *retry_config,
-                            int timeout_millisec, TessResultRenderer *renderer);
-
-  /**
-   * Turn a single image into symbolic text.
-   *
-   * The pix is the image processed. filename and page_index are
-   * metadata used by side-effect processes, such as reading a box
-   * file or formatting as hOCR.
-   *
-   * See ProcessPages for descriptions of other parameters.
-   */
-  bool ProcessPage(Pix *pix, int page_index, const char *filename,
-                   const char *retry_config, int timeout_millisec,
-                   TessResultRenderer *renderer);
-
-  /**
-   * Get a reading-order iterator to the results of LayoutAnalysis and/or
-   * Recognize. The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  ResultIterator *GetIterator();
-
-  /**
-   * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
-   * The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  MutableIterator *GetMutableIterator();
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   */
-  char *GetUTF8Text();
-
-  /**
-   * Make a HTML-formatted string with hOCR markup from the internal
-   * data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * monitor can be used to
-   *  cancel the recognition
-   *  receive progress callbacks
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
-
-  /**
-   * Make a HTML-formatted string with hOCR markup from the internal
-   * data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetHOCRText(int page_number);
-
-  /**
-   * Make an XML-formatted string with Alto markup from the internal
-   * data structures.
-   */
-  char *GetAltoText(ETEXT_DESC *monitor, int page_number);
-
-  /**
-   * Make an XML-formatted string with Alto markup from the internal
-   * data structures.
-   */
-  char *GetAltoText(int page_number);
-
-  /**
-   * Make a TSV-formatted string from the internal data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetTSVText(int page_number);
-
-  /**
-   * Make a box file for LSTM training from the internal data structures.
-   * Constructs coordinates in the original image - not just the rectangle.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetLSTMBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded in the same
-   * format as a box file used in training.
-   * Constructs coordinates in the original image - not just the rectangle.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded in the same
-   * format as a WordStr box file used in training.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetWordStrBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UNLV format Latin-1 with specific reject and suspect codes.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetUNLVText();
-
-  /**
-   * Detect the orientation of the input image and apparent script (alphabet).
-   * orient_deg is the detected clockwise rotation of the input image in degrees
-   * (0, 90, 180, 270)
-   * orient_conf is the confidence (15.0 is reasonably confident)
-   * script_name is an ASCII string, the name of the script, e.g. "Latin"
-   * script_conf is confidence level in the script
-   * Returns true on success and writes values to each parameter as an output
-   */
-  bool DetectOrientationScript(int *orient_deg, float *orient_conf,
-                               const char **script_name, float *script_conf);
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   * page_number is a 0-based page index that will appear in the osd file.
-   */
-  char *GetOsdText(int page_number);
-
-  /** Returns the (average) confidence value between 0 and 100. */
-  int MeanTextConf();
-  /**
-   * Returns all word confidences (between 0 and 100) in an array, terminated
-   * by -1.  The calling function must delete [] after use.
-   * The number of confidences should correspond to the number of space-
-   * delimited words in GetUTF8Text.
-   */
-  int *AllWordConfidences();
-
-#ifndef DISABLED_LEGACY_ENGINE
-  /**
-   * Applies the given word to the adaptive classifier if possible.
-   * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
-   * tell the boundaries of the graphemes.
-   * Assumes that SetImage/SetRectangle have been used to set the image
-   * to the given word. The mode arg should be PSM_SINGLE_WORD or
-   * PSM_CIRCLE_WORD, as that will be used to control layout analysis.
-   * The currently set PageSegMode is preserved.
-   * Returns false if adaption was not possible for some reason.
-   */
-  bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
-#endif //  ndef DISABLED_LEGACY_ENGINE
-
-  /**
-   * Free up recognition results and any stored image data, without actually
-   * freeing any recognition data that would be time-consuming to reload.
-   * Afterwards, you must call SetImage or TesseractRect before doing
-   * any Recognize or Get* operation.
-   */
-  void Clear();
-
-  /**
-   * Close down tesseract and free up all memory. End() is equivalent to
-   * destructing and reconstructing your TessBaseAPI.
-   * Once End() has been used, none of the other API functions may be used
-   * other than Init and anything declared above it in the class definition.
-   */
-  void End();
-
-  /**
-   * Clear any library-level memory caches.
-   * There are a variety of expensive-to-load constant data structures (mostly
-   * language dictionaries) that are cached globally -- surviving the Init()
-   * and End() of individual TessBaseAPI's.  This function allows the clearing
-   * of these caches.
-   **/
-  static void ClearPersistentCache();
-
-  /**
-   * Check whether a word is valid according to Tesseract's language model
-   * @return 0 if the word is invalid, non-zero if valid.
-   * @warning temporary! This function will be removed from here and placed
-   * in a separate API at some future time.
-   */
-  int IsValidWord(const char *word) const;
-  // Returns true if utf8_character is defined in the UniCharset.
-  bool IsValidCharacter(const char *utf8_character) const;
-
-  bool GetTextDirection(int *out_offset, float *out_slope);
-
-  /** Sets Dict::letter_is_okay_ function to point to the given function. */
-  void SetDictFunc(DictFunc f);
-
-  /** Sets Dict::probability_in_context_ function to point to the given
-   * function.
-   */
-  void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
-
-  /**
-   * Estimates the Orientation And Script of the image.
-   * @return true if the image was processed successfully.
-   */
-  bool DetectOS(OSResults *);
-
-  /**
-   * Return text orientation of each block as determined by an earlier run
-   * of layout analysis.
-   */
-  void GetBlockTextOrientations(int **block_orientation,
-                                bool **vertical_writing);
-
-  /** This method returns the string form of the specified unichar. */
-  const char *GetUnichar(int unichar_id) const;
-
-  /** Return the pointer to the i-th dawg loaded into tesseract_ object. */
-  const Dawg *GetDawg(int i) const;
-
-  /** Return the number of dawgs loaded into tesseract_ object. */
-  int NumDawgs() const;
-
-  Tesseract *tesseract() const {
-    return tesseract_;
-  }
-
-  OcrEngineMode oem() const {
-    return last_oem_requested_;
-  }
-
-  void set_min_orientation_margin(double margin);
-  /* @} */
-
-protected:
-  /** Common code for setting the image. Returns true if Init has been called.
-   */
-  bool InternalSetImage();
-
-  /**
-   * Run the thresholder to make the thresholded image. If pix is not nullptr,
-   * the source is thresholded to pix instead of the internal IMAGE.
-   */
-  virtual bool Threshold(Pix **pix);
-
-  /**
-   * Find lines from the image making the BLOCK_LIST.
-   * @return 0 on success.
-   */
-  int FindLines();
-
-  /** Delete the pageres and block list ready for a new page. */
-  void ClearResults();
-
-  /**
-   * Return an LTR Result Iterator -- used only for training, as we really want
-   * to ignore all BiDi smarts at that point.
-   * delete once you're done with it.
-   */
-  LTRResultIterator *GetLTRIterator();
-
-  /**
-   * Return the length of the output text string, as UTF8, assuming
-   * one newline per line and one per block, with a terminator,
-   * and assuming a single character reject marker for each rejected character.
-   * Also return the number of recognized blobs in blob_count.
-   */
-  int TextLength(int *blob_count) const;
-
-  //// paragraphs.cpp ////////////////////////////////////////////////////
-  void DetectParagraphs(bool after_text_recognition);
-
-  const PAGE_RES *GetPageRes() const {
-    return page_res_;
-  }
-
-protected:
-  Tesseract *tesseract_;          ///< The underlying data object.
-  Tesseract *osd_tesseract_;      ///< For orientation & script detection.
-  EquationDetect *equ_detect_;    ///< The equation detector.
-  FileReader reader_;             ///< Reads files from any filesystem.
-  ImageThresholder *thresholder_; ///< Image thresholding module.
-  std::vector<ParagraphModel *> *paragraph_models_;
-  BLOCK_LIST *block_list_;           ///< The page layout.
-  PAGE_RES *page_res_;               ///< The page-level data.
-  std::string input_file_;           ///< Name used by training code.
-  std::string output_file_;          ///< Name used by debug code.
-  std::string datapath_;             ///< Current location of tessdata.
-  std::string language_;             ///< Last initialized language.
-  OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
-  bool recognition_done_;            ///< page_res_ contains recognition data.
-
-  /**
-   * @defgroup ThresholderParams Thresholder Parameters
-   * Parameters saved from the Thresholder. Needed to rebuild coordinates.
-   */
-  /* @{ */
-  int rect_left_;
-  int rect_top_;
-  int rect_width_;
-  int rect_height_;
-  int image_width_;
-  int image_height_;
-  /* @} */
-
-private:
-  // A list of image filenames gets special consideration
-  bool ProcessPagesFileList(FILE *fp, std::string *buf,
-                            const char *retry_config, int timeout_millisec,
-                            TessResultRenderer *renderer,
-                            int tessedit_page_number);
-  // TIFF supports multipage so gets special consideration.
-  bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
-                                 const char *filename, const char *retry_config,
-                                 int timeout_millisec,
-                                 TessResultRenderer *renderer,
-                                 int tessedit_page_number);
-}; // class TessBaseAPI.
-
-/** Escape a char string - remove &<>"' with HTML codes. */
-std::string HOcrEscape(const char *text);
-
-} // namespace tesseract
-
-#endif // TESSERACT_API_BASEAPI_H_
--- a/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/capi.h
+++ b/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/capi.h
@ -1,484 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        capi.h
-// Description: C-API TessBaseAPI
-//
-// (C) Copyright 2012, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef API_CAPI_H_
-#define API_CAPI_H_
-
-#include "export.h"
-
-#ifdef __cplusplus
-#  include <tesseract/baseapi.h>
-#  include <tesseract/ocrclass.h>
-#  include <tesseract/pageiterator.h>
-#  include <tesseract/renderer.h>
-#  include <tesseract/resultiterator.h>
-#endif
-
-#include <stdbool.h>
-#include <stdio.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef BOOL
-#  define BOOL int
-#  define TRUE 1
-#  define FALSE 0
-#endif
-
-#ifdef __cplusplus
-typedef tesseract::TessResultRenderer TessResultRenderer;
-typedef tesseract::TessBaseAPI TessBaseAPI;
-typedef tesseract::PageIterator TessPageIterator;
-typedef tesseract::ResultIterator TessResultIterator;
-typedef tesseract::MutableIterator TessMutableIterator;
-typedef tesseract::ChoiceIterator TessChoiceIterator;
-typedef tesseract::OcrEngineMode TessOcrEngineMode;
-typedef tesseract::PageSegMode TessPageSegMode;
-typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
-typedef tesseract::Orientation TessOrientation;
-typedef tesseract::ParagraphJustification TessParagraphJustification;
-typedef tesseract::WritingDirection TessWritingDirection;
-typedef tesseract::TextlineOrder TessTextlineOrder;
-typedef tesseract::PolyBlockType TessPolyBlockType;
-typedef tesseract::ETEXT_DESC ETEXT_DESC;
-#else
-typedef struct TessResultRenderer TessResultRenderer;
-typedef struct TessBaseAPI TessBaseAPI;
-typedef struct TessPageIterator TessPageIterator;
-typedef struct TessResultIterator TessResultIterator;
-typedef struct TessMutableIterator TessMutableIterator;
-typedef struct TessChoiceIterator TessChoiceIterator;
-typedef enum TessOcrEngineMode {
-  OEM_TESSERACT_ONLY,
-  OEM_LSTM_ONLY,
-  OEM_TESSERACT_LSTM_COMBINED,
-  OEM_DEFAULT
-} TessOcrEngineMode;
-typedef enum TessPageSegMode {
-  PSM_OSD_ONLY,
-  PSM_AUTO_OSD,
-  PSM_AUTO_ONLY,
-  PSM_AUTO,
-  PSM_SINGLE_COLUMN,
-  PSM_SINGLE_BLOCK_VERT_TEXT,
-  PSM_SINGLE_BLOCK,
-  PSM_SINGLE_LINE,
-  PSM_SINGLE_WORD,
-  PSM_CIRCLE_WORD,
-  PSM_SINGLE_CHAR,
-  PSM_SPARSE_TEXT,
-  PSM_SPARSE_TEXT_OSD,
-  PSM_RAW_LINE,
-  PSM_COUNT
-} TessPageSegMode;
-typedef enum TessPageIteratorLevel {
-  RIL_BLOCK,
-  RIL_PARA,
-  RIL_TEXTLINE,
-  RIL_WORD,
-  RIL_SYMBOL
-} TessPageIteratorLevel;
-typedef enum TessPolyBlockType {
-  PT_UNKNOWN,
-  PT_FLOWING_TEXT,
-  PT_HEADING_TEXT,
-  PT_PULLOUT_TEXT,
-  PT_EQUATION,
-  PT_INLINE_EQUATION,
-  PT_TABLE,
-  PT_VERTICAL_TEXT,
-  PT_CAPTION_TEXT,
-  PT_FLOWING_IMAGE,
-  PT_HEADING_IMAGE,
-  PT_PULLOUT_IMAGE,
-  PT_HORZ_LINE,
-  PT_VERT_LINE,
-  PT_NOISE,
-  PT_COUNT
-} TessPolyBlockType;
-typedef enum TessOrientation {
-  ORIENTATION_PAGE_UP,
-  ORIENTATION_PAGE_RIGHT,
-  ORIENTATION_PAGE_DOWN,
-  ORIENTATION_PAGE_LEFT
-} TessOrientation;
-typedef enum TessParagraphJustification {
-  JUSTIFICATION_UNKNOWN,
-  JUSTIFICATION_LEFT,
-  JUSTIFICATION_CENTER,
-  JUSTIFICATION_RIGHT
-} TessParagraphJustification;
-typedef enum TessWritingDirection {
-  WRITING_DIRECTION_LEFT_TO_RIGHT,
-  WRITING_DIRECTION_RIGHT_TO_LEFT,
-  WRITING_DIRECTION_TOP_TO_BOTTOM
-} TessWritingDirection;
-typedef enum TessTextlineOrder {
-  TEXTLINE_ORDER_LEFT_TO_RIGHT,
-  TEXTLINE_ORDER_RIGHT_TO_LEFT,
-  TEXTLINE_ORDER_TOP_TO_BOTTOM
-} TessTextlineOrder;
-typedef struct ETEXT_DESC ETEXT_DESC;
-#endif
-
-typedef bool (*TessCancelFunc)(void *cancel_this, int words);
-typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
-                                 int bottom);
-
-struct Pix;
-struct Boxa;
-struct Pixa;
-
-/* General free functions */
-
-TESS_API const char *TessVersion();
-TESS_API void TessDeleteText(const char *text);
-TESS_API void TessDeleteTextArray(char **arr);
-TESS_API void TessDeleteIntArray(const int *arr);
-
-/* Renderer API */
-TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
-                                                     BOOL font_info);
-TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
-                                                   const char *datadir,
-                                                   BOOL textonly);
-TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
-    const char *outputbase);
-
-TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
-TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
-                                       TessResultRenderer *next);
-TESS_API TessResultRenderer *TessResultRendererNext(
-    TessResultRenderer *renderer);
-TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
-                                              const char *title);
-TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
-                                         TessBaseAPI *api);
-TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
-
-TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
-TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
-TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
-
-/* Base API */
-
-TESS_API TessBaseAPI *TessBaseAPICreate();
-TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
-
-TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
-
-TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
-TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
-TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
-TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
-
-TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
-                                     const char *value);
-TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
-                                          const char *value);
-
-TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
-                                        const char *name, int *value);
-TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
-                                         const char *name, BOOL *value);
-TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
-                                           const char *name, double *value);
-TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
-                                                  const char *name);
-
-TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
-TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
-                                              const char *filename);
-
-TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode oem,
-                              char **configs, int configs_size);
-TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode oem);
-TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
-                              const char *language);
-
-TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode mode,
-                              char **configs, int configs_size, char **vars_vec,
-                              char **vars_values, size_t vars_vec_size,
-                              BOOL set_only_non_debug_params);
-
-TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
-                              const char *language, TessOcrEngineMode mode,
-                              char **configs, int configs_size, char **vars_vec,
-                              char **vars_values, size_t vars_vec_size,
-                              BOOL set_only_non_debug_params);
-
-TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
-    const TessBaseAPI *handle);
-TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
-    const TessBaseAPI *handle);
-TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
-    const TessBaseAPI *handle);
-
-TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
-                                        const char *filename);
-TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
-                                             const char *filename);
-
-TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
-                                        TessPageSegMode mode);
-TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
-
-TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
-                               const unsigned char *imagedata,
-                               int bytes_per_pixel, int bytes_per_line,
-                               int left, int top, int width, int height);
-
-TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
-                                  const unsigned char *imagedata, int width,
-                                  int height, int bytes_per_pixel,
-                                  int bytes_per_line);
-TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
-
-TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
-
-TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
-                                      int width, int height);
-
-TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
-TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
-                                            struct Pixa **pixa);
-TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
-                                              struct Pixa **pixa,
-                                              int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
-                                               BOOL raw_image, int raw_padding,
-                                               struct Pixa **pixa,
-                                               int **blockids, int **paraids);
-TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
-                                           struct Pixa **pixa, int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
-                                          struct Pixa **pixa);
-TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
-                                                        struct Pixa **cc);
-TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
-                                                    TessPageIteratorLevel level,
-                                                    BOOL text_only,
-                                                    struct Pixa **pixa,
-                                                    int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
-    TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
-    BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
-    int **paraids);
-
-TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
-    const TessBaseAPI *handle);
-
-TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
-
-TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
-                                      const char *retry_config,
-                                      int timeout_millisec,
-                                      TessResultRenderer *renderer);
-TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
-                                     int page_index, const char *filename,
-                                     const char *retry_config,
-                                     int timeout_millisec,
-                                     TessResultRenderer *renderer);
-
-TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
-TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
-    TessBaseAPI *handle);
-
-TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
-TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
-
-TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
-
-TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
-                                            int page_number);
-
-TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
-TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
-
-TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
-
-#ifndef DISABLED_LEGACY_ENGINE
-TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
-                                        TessPageSegMode mode,
-                                        const char *wordstr);
-#endif // #ifndef DISABLED_LEGACY_ENGINE
-
-TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
-TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
-TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
-                                          float *out_slope);
-
-TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
-
-TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-// Call TessDeleteText(*best_script_name) to free memory allocated by this
-// function
-TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
-                                                 int *orient_deg,
-                                                 float *orient_conf,
-                                                 const char **script_name,
-                                                 float *script_conf);
-#endif // #ifndef DISABLED_LEGACY_ENGINE
-
-TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
-                                                 double margin);
-
-TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
-
-TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
-
-TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
-                                               int **block_orientation,
-                                               bool **vertical_writing);
-
-/* Page iterator */
-
-TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
-
-TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
-
-TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
-
-TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
-                                   TessPageIteratorLevel level);
-
-TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
-                                              TessPageIteratorLevel level);
-
-TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
-                                               TessPageIteratorLevel level,
-                                               TessPageIteratorLevel element);
-
-TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
-                                          TessPageIteratorLevel level,
-                                          int *left, int *top, int *right,
-                                          int *bottom);
-
-TESS_API TessPolyBlockType
-TessPageIteratorBlockType(const TessPageIterator *handle);
-
-TESS_API struct Pix *TessPageIteratorGetBinaryImage(
-    const TessPageIterator *handle, TessPageIteratorLevel level);
-
-TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
-                                              TessPageIteratorLevel level,
-                                              int padding,
-                                              struct Pix *original_image,
-                                              int *left, int *top);
-
-TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
-                                       TessPageIteratorLevel level, int *x1,
-                                       int *y1, int *x2, int *y2);
-
-TESS_API void TessPageIteratorOrientation(
-    TessPageIterator *handle, TessOrientation *orientation,
-    TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
-    float *deskew_angle);
-
-TESS_API void TessPageIteratorParagraphInfo(
-    TessPageIterator *handle, TessParagraphJustification *justification,
-    BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
-
-/* Result iterator */
-
-TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
-TESS_API TessResultIterator *TessResultIteratorCopy(
-    const TessResultIterator *handle);
-TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
-    TessResultIterator *handle);
-TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
-    const TessResultIterator *handle);
-TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
-    const TessResultIterator *handle);
-
-TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
-                                     TessPageIteratorLevel level);
-TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
-                                             TessPageIteratorLevel level);
-TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
-                                            TessPageIteratorLevel level);
-TESS_API const char *TessResultIteratorWordRecognitionLanguage(
-    const TessResultIterator *handle);
-TESS_API const char *TessResultIteratorWordFontAttributes(
-    const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
-    BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
-    int *pointsize, int *font_id);
-
-TESS_API BOOL
-TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
-TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
-
-TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
-TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
-TESS_API const char *TessChoiceIteratorGetUTF8Text(
-    const TessChoiceIterator *handle);
-TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
-
-/* Progress monitor */
-
-TESS_API ETEXT_DESC *TessMonitorCreate();
-TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
-                                       TessCancelFunc cancelFunc);
-TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
-TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
-                                         TessProgressFunc progressFunc);
-TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // API_CAPI_H_
--- a/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/export.h
+++ b/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/export.h
@ -1,37 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        export.h
-// Description: Place holder
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_PLATFORM_H_
-#define TESSERACT_PLATFORM_H_
-
-#ifndef TESS_API
-#  if defined(_WIN32) || defined(__CYGWIN__)
-#    if defined(TESS_EXPORTS)
-#      define TESS_API __declspec(dllexport)
-#    elif defined(TESS_IMPORTS)
-#      define TESS_API __declspec(dllimport)
-#    else
-#      define TESS_API
-#    endif
-#  else
-#    if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
-#      define TESS_API __attribute__((visibility("default")))
-#    else
-#      define TESS_API
-#    endif
-#  endif
-#endif
-
-#endif // TESSERACT_PLATFORM_H_
--- a/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/ltrresultiterator.h
+++ b/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/ltrresultiterator.h
@ -1,235 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        ltrresultiterator.h
-// Description: Iterator for tesseract results in strict left-to-right
-//              order that avoids using tesseract internal data structures.
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
-#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
-
-#include "export.h"       // for TESS_API
-#include "pageiterator.h" // for PageIterator
-#include "publictypes.h"  // for PageIteratorLevel
-#include "unichar.h"      // for StrongScriptDirection
-
-namespace tesseract {
-
-class BLOB_CHOICE_IT;
-class PAGE_RES;
-class WERD_RES;
-
-class Tesseract;
-
-// Class to iterate over tesseract results, providing access to all levels
-// of the page hierarchy, without including any tesseract headers or having
-// to handle any tesseract structures.
-// WARNING! This class points to data held within the TessBaseAPI class, and
-// therefore can only be used while the TessBaseAPI class still exists and
-// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-// DetectOS, or anything else that changes the internal PAGE_RES.
-// See tesseract/publictypes.h for the definition of PageIteratorLevel.
-// See also base class PageIterator, which contains the bulk of the interface.
-// LTRResultIterator adds text-specific methods for access to OCR output.
-
-class TESS_API LTRResultIterator : public PageIterator {
-  friend class ChoiceIterator;
-
-public:
-  // page_res and tesseract come directly from the BaseAPI.
-  // The rectangle parameters are copied indirectly from the Thresholder,
-  // via the BaseAPI. They represent the coordinates of some rectangle in an
-  // original image (in top-left-origin coordinates) and therefore the top-left
-  // needs to be added to any output boxes in order to specify coordinates
-  // in the original image. See TessBaseAPI::SetRectangle.
-  // The scale and scaled_yres are in case the Thresholder scaled the image
-  // rectangle prior to thresholding. Any coordinates in tesseract's image
-  // must be divided by scale before adding (rect_left, rect_top).
-  // The scaled_yres indicates the effective resolution of the binary image
-  // that tesseract has been given by the Thresholder.
-  // After the constructor, Begin has already been called.
-  LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
-                    int scaled_yres, int rect_left, int rect_top,
-                    int rect_width, int rect_height);
-
-  ~LTRResultIterator() override;
-
-  // LTRResultIterators may be copied! This makes it possible to iterate over
-  // all the objects at a lower level, while maintaining an iterator to
-  // objects at a higher level. These constructors DO NOT CALL Begin, so
-  // iterations will continue from the location of src.
-  // TODO: For now the copy constructor and operator= only need the base class
-  // versions, but if new data members are added, don't forget to add them!
-
-  // ============= Moving around within the page ============.
-
-  // See PageIterator.
-
-  // ============= Accessing data ==============.
-
-  // Returns the null terminated UTF-8 encoded text string for the current
-  // object at the given level. Use delete [] to free after use.
-  char *GetUTF8Text(PageIteratorLevel level) const;
-
-  // Set the string inserted at the end of each text line. "\n" by default.
-  void SetLineSeparator(const char *new_line);
-
-  // Set the string inserted at the end of each paragraph. "\n" by default.
-  void SetParagraphSeparator(const char *new_para);
-
-  // Returns the mean confidence of the current object at the given level.
-  // The number should be interpreted as a percent probability. (0.0f-100.0f)
-  float Confidence(PageIteratorLevel level) const;
-
-  // ============= Functions that refer to words only ============.
-
-  // Returns the font attributes of the current word. If iterating at a higher
-  // level object than words, eg textlines, then this will return the
-  // attributes of the first word in that textline.
-  // The actual return value is a string representing a font name. It points
-  // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
-  // the iterator itself, ie rendered invalid by various members of
-  // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
-  // Pointsize is returned in printers points (1/72 inch.)
-  const char *WordFontAttributes(bool *is_bold, bool *is_italic,
-                                 bool *is_underlined, bool *is_monospace,
-                                 bool *is_serif, bool *is_smallcaps,
-                                 int *pointsize, int *font_id) const;
-
-  // Return the name of the language used to recognize this word.
-  // On error, nullptr.  Do not delete this pointer.
-  const char *WordRecognitionLanguage() const;
-
-  // Return the overall directionality of this word.
-  StrongScriptDirection WordDirection() const;
-
-  // Returns true if the current word was found in a dictionary.
-  bool WordIsFromDictionary() const;
-
-  // Returns the number of blanks before the current word.
-  int BlanksBeforeWord() const;
-
-  // Returns true if the current word is numeric.
-  bool WordIsNumeric() const;
-
-  // Returns true if the word contains blamer information.
-  bool HasBlamerInfo() const;
-
-  // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
-  // of the current word.
-  const void *GetParamsTrainingBundle() const;
-
-  // Returns a pointer to the string with blamer information for this word.
-  // Assumes that the word's blamer_bundle is not nullptr.
-  const char *GetBlamerDebug() const;
-
-  // Returns a pointer to the string with misadaption information for this word.
-  // Assumes that the word's blamer_bundle is not nullptr.
-  const char *GetBlamerMisadaptionDebug() const;
-
-  // Returns true if a truth string was recorded for the current word.
-  bool HasTruthString() const;
-
-  // Returns true if the given string is equivalent to the truth string for
-  // the current word.
-  bool EquivalentToTruth(const char *str) const;
-
-  // Returns a null terminated UTF-8 encoded truth string for the current word.
-  // Use delete [] to free after use.
-  char *WordTruthUTF8Text() const;
-
-  // Returns a null terminated UTF-8 encoded normalized OCR string for the
-  // current word. Use delete [] to free after use.
-  char *WordNormedUTF8Text() const;
-
-  // Returns a pointer to serialized choice lattice.
-  // Fills lattice_size with the number of bytes in lattice data.
-  const char *WordLattice(int *lattice_size) const;
-
-  // ============= Functions that refer to symbols only ============.
-
-  // Returns true if the current symbol is a superscript.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsSuperscript() const;
-  // Returns true if the current symbol is a subscript.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsSubscript() const;
-  // Returns true if the current symbol is a dropcap.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsDropcap() const;
-
-protected:
-  const char *line_separator_;
-  const char *paragraph_separator_;
-};
-
-// Class to iterate over the classifier choices for a single RIL_SYMBOL.
-class TESS_API ChoiceIterator {
-public:
-  // Construction is from a LTRResultIterator that points to the symbol of
-  // interest. The ChoiceIterator allows a one-shot iteration over the
-  // choices for this symbol and after that it is useless.
-  explicit ChoiceIterator(const LTRResultIterator &result_it);
-  ~ChoiceIterator();
-
-  // Moves to the next choice for the symbol and returns false if there
-  // are none left.
-  bool Next();
-
-  // ============= Accessing data ==============.
-
-  // Returns the null terminated UTF-8 encoded text string for the current
-  // choice.
-  // NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
-  // internal structure and should NOT be delete[]ed to free after use.
-  const char *GetUTF8Text() const;
-
-  // Returns the confidence of the current choice depending on the used language
-  // data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
-  // choices for one symbol should roughly add up to 1.0f.
-  // If only traineddata of the legacy engine is used, the number should be
-  // interpreted as a percent probability. (0.0f-100.0f) In this case
-  // probabilities won't add up to 100. Each one stands on its own.
-  float Confidence() const;
-
-  // Returns a vector containing all timesteps, which belong to the currently
-  // selected symbol. A timestep is a vector containing pairs of symbols and
-  // floating point numbers. The number states the probability for the
-  // corresponding symbol.
-  std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
-
-private:
-  // clears the remaining spaces out of the results and adapt the probabilities
-  void filterSpaces();
-  // Pointer to the WERD_RES object owned by the API.
-  WERD_RES *word_res_;
-  // Iterator over the blob choices.
-  BLOB_CHOICE_IT *choice_it_;
-  std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
-  std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
-
-  const int *tstep_index_;
-  // regulates the rating granularity
-  double rating_coefficient_;
-  // leading blanks
-  int blanks_before_word_;
-  // true when there is lstm engine related trained data
-  bool oemLSTM_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/ocrclass.h
+++ b/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/ocrclass.h
@ -1,158 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-/**********************************************************************
- * File:        ocrclass.h
- * Description: Class definitions and constants for the OCR API.
- * Author:      Hewlett-Packard Co
- *
- * (C) Copyright 1996, Hewlett-Packard Co.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-/**********************************************************************
- * This file contains typedefs for all the structures used by
- * the HP OCR interface.
- * The structures are designed to allow them to be used with any
- * structure alignment up to 8.
- **********************************************************************/
-
-#ifndef CCUTIL_OCRCLASS_H_
-#define CCUTIL_OCRCLASS_H_
-
-#include <chrono>
-#include <ctime>
-
-namespace tesseract {
-
-/**********************************************************************
- * EANYCODE_CHAR
- * Description of a single character. The character code is defined by
- * the character set of the current font.
- * Output text is sent as an array of these structures.
- * Spaces and line endings in the output are represented in the
- * structures of the surrounding characters. They are not directly
- * represented as characters.
- * The first character in a word has a positive value of blanks.
- * Missing information should be set to the defaults in the comments.
- * If word bounds are known, but not character bounds, then the top and
- * bottom of each character should be those of the word. The left of the
- * first and right of the last char in each word should be set. All other
- * lefts and rights should be set to -1.
- * If set, the values of right and bottom are left+width and top+height.
- * Most of the members come directly from the parameters to ocr_append_char.
- * The formatting member uses the enhancement parameter and combines the
- * line direction stuff into the top 3 bits.
- * The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
- * 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
- * the coding is, only that it is backwards compatible with the previous
- * version.
- **********************************************************************/
-
-struct EANYCODE_CHAR { /*single character */
-  // It should be noted that the format for char_code for version 2.0 and beyond
-  // is UTF8 which means that ASCII characters will come out as one structure
-  // but other characters will be returned in two or more instances of this
-  // structure with a single byte of the  UTF8 code in each, but each will have
-  // the same bounding box. Programs which want to handle languagues with
-  // different characters sets will need to handle extended characters
-  // appropriately, but *all* code needs to be prepared to receive UTF8 coded
-  // characters for characters such as bullet and fancy quotes.
-  uint16_t char_code; /*character itself */
-  int16_t left;       /*of char (-1) */
-  int16_t right;      /*of char (-1) */
-  int16_t top;        /*of char (-1) */
-  int16_t bottom;     /*of char (-1) */
-  int16_t font_index; /*what font (0) */
-  uint8_t confidence; /*0=perfect, 100=reject (0/100) */
-  uint8_t point_size; /*of char, 72=i inch, (10) */
-  int8_t blanks;      /*no of spaces before this char (1) */
-  uint8_t formatting; /*char formatting (0) */
-};
-
-/**********************************************************************
- * ETEXT_DESC
- * Description of the output of the OCR engine.
- * This structure is used as both a progress monitor and the final
- * output header, since it needs to be a valid progress monitor while
- * the OCR engine is storing its output to shared memory.
- * During progress, all the buffer info is -1.
- * Progress starts at 0 and increases to 100 during OCR. No other constraint.
- * Additionally the progress callback contains the bounding box of the word that
- * is currently being processed.
- * Every progress callback, the OCR engine must set ocr_alive to 1.
- * The HP side will set ocr_alive to 0. Repeated failure to reset
- * to 1 indicates that the OCR engine is dead.
- * If the cancel function is not null then it is called with the number of
- * user words found. If it returns true then operation is cancelled.
- **********************************************************************/
-class ETEXT_DESC;
-
-using CANCEL_FUNC = bool (*)(void *, int);
-using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
-using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
-
-class ETEXT_DESC { // output header
-public:
-  int16_t count{0};    /// chars in this buffer(0)
-  int16_t progress{0}; /// percent complete increasing (0-100)
-  /** Progress monitor covers word recognition and it does not cover layout
-   * analysis.
-   * See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
-  int8_t more_to_come{0};       /// true if not last
-  volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
-  int8_t err_code{0};           /// for errcode use
-  CANCEL_FUNC cancel{nullptr};  /// returns true to cancel
-  PROGRESS_FUNC progress_callback{
-      nullptr};                      /// called whenever progress increases
-  PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
-  void *cancel_this{nullptr};        /// this or other data for cancel
-  std::chrono::steady_clock::time_point end_time;
-  /// Time to stop. Expected to be set only
-  /// by call to set_deadline_msecs().
-  EANYCODE_CHAR text[1]{}; /// character data
-
-  ETEXT_DESC() : progress_callback2(&default_progress_func) {
-    end_time = std::chrono::time_point<std::chrono::steady_clock,
-                                       std::chrono::milliseconds>();
-  }
-
-  // Sets the end time to be deadline_msecs milliseconds from now.
-  void set_deadline_msecs(int32_t deadline_msecs) {
-    if (deadline_msecs > 0) {
-      end_time = std::chrono::steady_clock::now() +
-                 std::chrono::milliseconds(deadline_msecs);
-    }
-  }
-
-  // Returns false if we've not passed the end_time, or have not set a deadline.
-  bool deadline_exceeded() const {
-    if (end_time.time_since_epoch() ==
-        std::chrono::steady_clock::duration::zero()) {
-      return false;
-    }
-    auto now = std::chrono::steady_clock::now();
-    return (now > end_time);
-  }
-
-private:
-  static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
-                                    int top, int bottom) {
-    if (ths->progress_callback != nullptr) {
-      return (*(ths->progress_callback))(ths->progress, left, right, top,
-                                         bottom);
-    }
-    return true;
-  }
-};
-
-} // namespace tesseract
-
-#endif // CCUTIL_OCRCLASS_H_
--- a/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/osdetect.h
+++ b/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/osdetect.h
@ -1,139 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        osdetect.h
-// Description: Orientation and script detection.
-// Author:      Samuel Charron
-//              Ranjith Unnikrishnan
-//
-// (C) Copyright 2008, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_OSDETECT_H_
-#define TESSERACT_CCMAIN_OSDETECT_H_
-
-#include "export.h" // for TESS_API
-
-#include <vector> // for std::vector
-
-namespace tesseract {
-
-class BLOBNBOX;
-class BLOBNBOX_CLIST;
-class BLOB_CHOICE_LIST;
-class TO_BLOCK_LIST;
-class UNICHARSET;
-
-class Tesseract;
-
-// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
-const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
-
-struct OSBestResult {
-  OSBestResult()
-      : orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
-  int orientation_id;
-  int script_id;
-  float sconfidence;
-  float oconfidence;
-};
-
-struct OSResults {
-  OSResults() : unicharset(nullptr) {
-    for (int i = 0; i < 4; ++i) {
-      for (int j = 0; j < kMaxNumberOfScripts; ++j) {
-        scripts_na[i][j] = 0;
-      }
-      orientations[i] = 0;
-    }
-  }
-  void update_best_orientation();
-  // Set the estimate of the orientation to the given id.
-  void set_best_orientation(int orientation_id);
-  // Update/Compute the best estimate of the script assuming the given
-  // orientation id.
-  void update_best_script(int orientation_id);
-  // Return the index of the script with the highest score for this orientation.
-  TESS_API int get_best_script(int orientation_id) const;
-  // Accumulate scores with given OSResults instance and update the best script.
-  void accumulate(const OSResults &osr);
-
-  // Print statistics.
-  void print_scores(void) const;
-  void print_scores(int orientation_id) const;
-
-  // Array holding scores for each orientation id [0,3].
-  // Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
-  // page respectively, where the values refer to the amount of clockwise
-  // rotation to be applied to the page for the text to be upright and readable.
-  float orientations[4];
-  // Script confidence scores for each of 4 possible orientations.
-  float scripts_na[4][kMaxNumberOfScripts];
-
-  UNICHARSET *unicharset;
-  OSBestResult best_result;
-};
-
-class OrientationDetector {
-public:
-  OrientationDetector(const std::vector<int> *allowed_scripts,
-                      OSResults *results);
-  bool detect_blob(BLOB_CHOICE_LIST *scores);
-  int get_orientation();
-
-private:
-  OSResults *osr_;
-  const std::vector<int> *allowed_scripts_;
-};
-
-class ScriptDetector {
-public:
-  ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
-                 tesseract::Tesseract *tess);
-  void detect_blob(BLOB_CHOICE_LIST *scores);
-  bool must_stop(int orientation) const;
-
-private:
-  OSResults *osr_;
-  static const char *korean_script_;
-  static const char *japanese_script_;
-  static const char *fraktur_script_;
-  int korean_id_;
-  int japanese_id_;
-  int katakana_id_;
-  int hiragana_id_;
-  int han_id_;
-  int hangul_id_;
-  int latin_id_;
-  int fraktur_id_;
-  tesseract::Tesseract *tess_;
-  const std::vector<int> *allowed_scripts_;
-};
-
-int orientation_and_script_detection(const char *filename, OSResults *,
-                                     tesseract::Tesseract *);
-
-int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
-              tesseract::Tesseract *tess);
-
-int os_detect_blobs(const std::vector<int> *allowed_scripts,
-                    BLOBNBOX_CLIST *blob_list, OSResults *osr,
-                    tesseract::Tesseract *tess);
-
-bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
-                    OSResults *, tesseract::Tesseract *tess);
-
-// Helper method to convert an orientation index to its value in degrees.
-// The value represents the amount of clockwise rotation in degrees that must be
-// applied for the text to be upright (readable).
-TESS_API int OrientationIdToValue(const int &id);
-
-} // namespace tesseract
-
-#endif // TESSERACT_CCMAIN_OSDETECT_H_
--- a/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/pageiterator.h
+++ b/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/pageiterator.h
@ -1,364 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        pageiterator.h
-// Description: Iterator for tesseract page structure that avoids using
-//              tesseract internal data structures.
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
-#define TESSERACT_CCMAIN_PAGEITERATOR_H_
-
-#include "export.h"
-#include "publictypes.h"
-
-struct Pix;
-struct Pta;
-
-namespace tesseract {
-
-struct BlamerBundle;
-class C_BLOB_IT;
-class PAGE_RES;
-class PAGE_RES_IT;
-class WERD;
-
-class Tesseract;
-
-/**
- * Class to iterate over tesseract page structure, providing access to all
- * levels of the page hierarchy, without including any tesseract headers or
- * having to handle any tesseract structures.
- * WARNING! This class points to data held within the TessBaseAPI class, and
- * therefore can only be used while the TessBaseAPI class still exists and
- * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
- * DetectOS, or anything else that changes the internal PAGE_RES.
- * See tesseract/publictypes.h for the definition of PageIteratorLevel.
- * See also ResultIterator, derived from PageIterator, which adds in the
- * ability to access OCR output with text-specific methods.
- */
-
-class TESS_API PageIterator {
-public:
-  /**
-   * page_res and tesseract come directly from the BaseAPI.
-   * The rectangle parameters are copied indirectly from the Thresholder,
-   * via the BaseAPI. They represent the coordinates of some rectangle in an
-   * original image (in top-left-origin coordinates) and therefore the top-left
-   * needs to be added to any output boxes in order to specify coordinates
-   * in the original image. See TessBaseAPI::SetRectangle.
-   * The scale and scaled_yres are in case the Thresholder scaled the image
-   * rectangle prior to thresholding. Any coordinates in tesseract's image
-   * must be divided by scale before adding (rect_left, rect_top).
-   * The scaled_yres indicates the effective resolution of the binary image
-   * that tesseract has been given by the Thresholder.
-   * After the constructor, Begin has already been called.
-   */
-  PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
-               int scaled_yres, int rect_left, int rect_top, int rect_width,
-               int rect_height);
-  virtual ~PageIterator();
-
-  /**
-   * Page/ResultIterators may be copied! This makes it possible to iterate over
-   * all the objects at a lower level, while maintaining an iterator to
-   * objects at a higher level. These constructors DO NOT CALL Begin, so
-   * iterations will continue from the location of src.
-   */
-  PageIterator(const PageIterator &src);
-  const PageIterator &operator=(const PageIterator &src);
-
-  /** Are we positioned at the same location as other? */
-  bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
-
-  // ============= Moving around within the page ============.
-
-  /**
-   * Moves the iterator to point to the start of the page to begin an
-   * iteration.
-   */
-  virtual void Begin();
-
-  /**
-   * Moves the iterator to the beginning of the paragraph.
-   * This class implements this functionality by moving it to the zero indexed
-   * blob of the first (leftmost) word on the first row of the paragraph.
-   */
-  virtual void RestartParagraph();
-
-  /**
-   * Return whether this iterator points anywhere in the first textline of a
-   * paragraph.
-   */
-  bool IsWithinFirstTextlineOfParagraph() const;
-
-  /**
-   * Moves the iterator to the beginning of the text line.
-   * This class implements this functionality by moving it to the zero indexed
-   * blob of the first (leftmost) word of the row.
-   */
-  virtual void RestartRow();
-
-  /**
-   * Moves to the start of the next object at the given level in the
-   * page hierarchy, and returns false if the end of the page was reached.
-   * NOTE that RIL_SYMBOL will skip non-text blocks, but all other
-   * PageIteratorLevel level values will visit each non-text block once.
-   * Think of non text blocks as containing a single para, with a single line,
-   * with a single imaginary word.
-   * Calls to Next with different levels may be freely intermixed.
-   * This function iterates words in right-to-left scripts correctly, if
-   * the appropriate language has been loaded into Tesseract.
-   */
-  virtual bool Next(PageIteratorLevel level);
-
-  /**
-   * Returns true if the iterator is at the start of an object at the given
-   * level.
-   *
-   * For instance, suppose an iterator it is pointed to the first symbol of the
-   * first word of the third line of the second paragraph of the first block in
-   * a page, then:
-   *   it.IsAtBeginningOf(RIL_BLOCK) = false
-   *   it.IsAtBeginningOf(RIL_PARA) = false
-   *   it.IsAtBeginningOf(RIL_TEXTLINE) = true
-   *   it.IsAtBeginningOf(RIL_WORD) = true
-   *   it.IsAtBeginningOf(RIL_SYMBOL) = true
-   */
-  virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
-
-  /**
-   * Returns whether the iterator is positioned at the last element in a
-   * given level. (e.g. the last word in a line, the last line in a block)
-   *
-   *     Here's some two-paragraph example
-   *   text.  It starts off innocuously
-   *   enough but quickly turns bizarre.
-   *     The author inserts a cornucopia
-   *   of words to guard against confused
-   *   references.
-   *
-   * Now take an iterator it pointed to the start of "bizarre."
-   *  it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
-   *  it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
-   *  it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
-   */
-  virtual bool IsAtFinalElement(PageIteratorLevel level,
-                                PageIteratorLevel element) const;
-
-  /**
-   * Returns whether this iterator is positioned
-   *   before other:   -1
-   *   equal to other:  0
-   *   after other:     1
-   */
-  int Cmp(const PageIterator &other) const;
-
-  // ============= Accessing data ==============.
-  // Coordinate system:
-  // Integer coordinates are at the cracks between the pixels.
-  // The top-left corner of the top-left pixel in the image is at (0,0).
-  // The bottom-right corner of the bottom-right pixel in the image is at
-  // (width, height).
-  // Every bounding box goes from the top-left of the top-left contained
-  // pixel to the bottom-right of the bottom-right contained pixel, so
-  // the bounding box of the single top-left pixel in the image is:
-  // (0,0)->(1,1).
-  // If an image rectangle has been set in the API, then returned coordinates
-  // relate to the original (full) image, rather than the rectangle.
-
-  /**
-   * Controls what to include in a bounding box. Bounding boxes of all levels
-   * between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
-   * Between layout analysis and recognition, it isn't known where all
-   * diacritics belong, so this control is used to include or exclude some
-   * diacritics that are above or below the main body of the word. In most cases
-   * where the placement is obvious, and after recognition, it doesn't make as
-   * much difference, as the diacritics will already be included in the word.
-   */
-  void SetBoundingBoxComponents(bool include_upper_dots,
-                                bool include_lower_dots) {
-    include_upper_dots_ = include_upper_dots;
-    include_lower_dots_ = include_lower_dots;
-  }
-
-  /**
-   * Returns the bounding rectangle of the current object at the given level.
-   * See comment on coordinate system above.
-   * Returns false if there is no such object at the current position.
-   * The returned bounding box is guaranteed to match the size and position
-   * of the image returned by GetBinaryImage, but may clip foreground pixels
-   * from a grey image. The padding argument to GetImage can be used to expand
-   * the image to include more foreground pixels. See GetImage below.
-   */
-  bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
-                   int *bottom) const;
-  bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
-                   int *right, int *bottom) const;
-  /**
-   * Returns the bounding rectangle of the object in a coordinate system of the
-   * working image rectangle having its origin at (rect_left_, rect_top_) with
-   * respect to the original image and is scaled by a factor scale_.
-   */
-  bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
-                           int *right, int *bottom) const;
-
-  /** Returns whether there is no object of a given level. */
-  bool Empty(PageIteratorLevel level) const;
-
-  /**
-   * Returns the type of the current block.
-   * See tesseract/publictypes.h for PolyBlockType.
-   */
-  PolyBlockType BlockType() const;
-
-  /**
-   * Returns the polygon outline of the current block. The returned Pta must
-   * be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
-   * of the polygon, and the last edge is the line segment between the last
-   * point and the first point. nullptr will be returned if the iterator is
-   * at the end of the document or layout analysis was not used.
-   */
-  Pta *BlockPolygon() const;
-
-  /**
-   * Returns a binary image of the current object at the given level.
-   * The position and size match the return from BoundingBoxInternal, and so
-   * this could be upscaled with respect to the original input image.
-   * Use pixDestroy to delete the image after use.
-   */
-  Pix *GetBinaryImage(PageIteratorLevel level) const;
-
-  /**
-   * Returns an image of the current object at the given level in greyscale
-   * if available in the input. To guarantee a binary image use BinaryImage.
-   * NOTE that in order to give the best possible image, the bounds are
-   * expanded slightly over the binary connected component, by the supplied
-   * padding, so the top-left position of the returned image is returned
-   * in (left,top). These will most likely not match the coordinates
-   * returned by BoundingBox.
-   * If you do not supply an original image, you will get a binary one.
-   * Use pixDestroy to delete the image after use.
-   */
-  Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
-                int *left, int *top) const;
-
-  /**
-   * Returns the baseline of the current object at the given level.
-   * The baseline is the line that passes through (x1, y1) and (x2, y2).
-   * WARNING: with vertical text, baselines may be vertical!
-   * Returns false if there is no baseline at the current position.
-   */
-  bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
-                int *y2) const;
-
-  // Returns the attributes of the current row.
-  void RowAttributes(float *row_height, float *descenders,
-                     float *ascenders) const;
-
-  /**
-   * Returns orientation for the block the iterator points to.
-   *   orientation, writing_direction, textline_order: see publictypes.h
-   *   deskew_angle: after rotating the block so the text orientation is
-   *                 upright, how many radians does one have to rotate the
-   *                 block anti-clockwise for it to be level?
-   *                   -Pi/4 <= deskew_angle <= Pi/4
-   */
-  void Orientation(tesseract::Orientation *orientation,
-                   tesseract::WritingDirection *writing_direction,
-                   tesseract::TextlineOrder *textline_order,
-                   float *deskew_angle) const;
-
-  /**
-   * Returns information about the current paragraph, if available.
-   *
-   *   justification -
-   *     LEFT if ragged right, or fully justified and script is left-to-right.
-   *     RIGHT if ragged left, or fully justified and script is right-to-left.
-   *     unknown if it looks like source code or we have very few lines.
-   *   is_list_item -
-   *     true if we believe this is a member of an ordered or unordered list.
-   *   is_crown -
-   *     true if the first line of the paragraph is aligned with the other
-   *     lines of the paragraph even though subsequent paragraphs have first
-   *     line indents.  This typically indicates that this is the continuation
-   *     of a previous paragraph or that it is the very first paragraph in
-   *     the chapter.
-   *   first_line_indent -
-   *     For LEFT aligned paragraphs, the first text line of paragraphs of
-   *     this kind are indented this many pixels from the left edge of the
-   *     rest of the paragraph.
-   *     for RIGHT aligned paragraphs, the first text line of paragraphs of
-   *     this kind are indented this many pixels from the right edge of the
-   *     rest of the paragraph.
-   *     NOTE 1: This value may be negative.
-   *     NOTE 2: if *is_crown == true, the first line of this paragraph is
-   *             actually flush, and first_line_indent is set to the "common"
-   *             first_line_indent for subsequent paragraphs in this block
-   *             of text.
-   */
-  void ParagraphInfo(tesseract::ParagraphJustification *justification,
-                     bool *is_list_item, bool *is_crown,
-                     int *first_line_indent) const;
-
-  // If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
-  // of the current word to the given pointer (takes ownership of the pointer)
-  // and returns true.
-  // Can only be used when iterating on the word level.
-  bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
-
-protected:
-  /**
-   * Sets up the internal data for iterating the blobs of a new word, then
-   * moves the iterator to the given offset.
-   */
-  void BeginWord(int offset);
-
-  /** Pointer to the page_res owned by the API. */
-  PAGE_RES *page_res_;
-  /** Pointer to the Tesseract object owned by the API. */
-  Tesseract *tesseract_;
-  /**
-   * The iterator to the page_res_. Owned by this ResultIterator.
-   * A pointer just to avoid dragging in Tesseract includes.
-   */
-  PAGE_RES_IT *it_;
-  /**
-   * The current input WERD being iterated. If there is an output from OCR,
-   * then word_ is nullptr. Owned by the API
-   */
-  WERD *word_;
-  /** The length of the current word_. */
-  int word_length_;
-  /** The current blob index within the word. */
-  int blob_index_;
-  /**
-   * Iterator to the blobs within the word. If nullptr, then we are iterating
-   * OCR results in the box_word.
-   * Owned by this ResultIterator.
-   */
-  C_BLOB_IT *cblob_it_;
-  /** Control over what to include in bounding boxes. */
-  bool include_upper_dots_;
-  bool include_lower_dots_;
-  /** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
-  int scale_;
-  int scaled_yres_;
-  int rect_left_;
-  int rect_top_;
-  int rect_width_;
-  int rect_height_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/publictypes.h
+++ b/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/publictypes.h
@ -1,281 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        publictypes.h
-// Description: Types used in both the API and internally
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
-#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
-
-namespace tesseract {
-
-// This file contains types that are used both by the API and internally
-// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
-// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
-// Restated: It is OK for low-level Tesseract files to include publictypes.h,
-// but not for the low-level tesseract code to include top-level API code.
-// This file should not use other Tesseract types, as that would drag
-// their includes into the API-level.
-
-/** Number of printers' points in an inch. The unit of the pointsize return. */
-constexpr int kPointsPerInch = 72;
-/**
- * Minimum believable resolution. Used as a default if there is no other
- * information, as it is safer to under-estimate than over-estimate.
- */
-constexpr int kMinCredibleResolution = 70;
-/** Maximum believable resolution.  */
-constexpr int kMaxCredibleResolution = 2400;
-/**
- * Ratio between median blob size and likely resolution. Used to estimate
- * resolution when none is provided. This is basically 1/usual text size in
- * inches.  */
-constexpr int kResolutionEstimationFactor = 10;
-
-/**
- * Possible types for a POLY_BLOCK or ColPartition.
- * Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
- * below, as well as kPolyBlockNames in layout_test.cc.
- * Used extensively by ColPartition, and POLY_BLOCK.
- */
-enum PolyBlockType {
-  PT_UNKNOWN,         // Type is not yet known. Keep as the first element.
-  PT_FLOWING_TEXT,    // Text that lives inside a column.
-  PT_HEADING_TEXT,    // Text that spans more than one column.
-  PT_PULLOUT_TEXT,    // Text that is in a cross-column pull-out region.
-  PT_EQUATION,        // Partition belonging to an equation region.
-  PT_INLINE_EQUATION, // Partition has inline equation.
-  PT_TABLE,           // Partition belonging to a table region.
-  PT_VERTICAL_TEXT,   // Text-line runs vertically.
-  PT_CAPTION_TEXT,    // Text that belongs to an image.
-  PT_FLOWING_IMAGE,   // Image that lives inside a column.
-  PT_HEADING_IMAGE,   // Image that spans more than one column.
-  PT_PULLOUT_IMAGE,   // Image that is in a cross-column pull-out region.
-  PT_HORZ_LINE,       // Horizontal Line.
-  PT_VERT_LINE,       // Vertical Line.
-  PT_NOISE,           // Lies outside of any column.
-  PT_COUNT
-};
-
-/** Returns true if PolyBlockType is of horizontal line type */
-inline bool PTIsLineType(PolyBlockType type) {
-  return type == PT_HORZ_LINE || type == PT_VERT_LINE;
-}
-/** Returns true if PolyBlockType is of image type */
-inline bool PTIsImageType(PolyBlockType type) {
-  return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
-         type == PT_PULLOUT_IMAGE;
-}
-/** Returns true if PolyBlockType is of text type */
-inline bool PTIsTextType(PolyBlockType type) {
-  return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
-         type == PT_PULLOUT_TEXT || type == PT_TABLE ||
-         type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
-         type == PT_INLINE_EQUATION;
-}
-// Returns true if PolyBlockType is of pullout(inter-column) type
-inline bool PTIsPulloutType(PolyBlockType type) {
-  return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
-}
-
-/**
- *  +------------------+  Orientation Example:
- *  | 1 Aaaa Aaaa Aaaa |  ====================
- *  | Aaa aa aaa aa    |  To left is a diagram of some (1) English and
- *  | aaaaaa A aa aaa. |  (2) Chinese text and a (3) photo credit.
- *  |                2 |
- *  |   #######  c c C |  Upright Latin characters are represented as A and a.
- *  |   #######  c c c |  '<' represents a latin character rotated
- *  | < #######  c c c |      anti-clockwise 90 degrees.
- *  | < #######  c   c |
- *  | < #######  .   c |  Upright Chinese characters are represented C and c.
- *  | 3 #######      c |
- *  +------------------+  NOTA BENE: enum values here should match goodoc.proto
-
- * If you orient your head so that "up" aligns with Orientation,
- * then the characters will appear "right side up" and readable.
- *
- * In the example above, both the English and Chinese paragraphs are oriented
- * so their "up" is the top of the page (page up).  The photo credit is read
- * with one's head turned leftward ("up" is to page left).
- *
- * The values of this enum match the convention of Tesseract's osdetect.h
-*/
-enum Orientation {
-  ORIENTATION_PAGE_UP = 0,
-  ORIENTATION_PAGE_RIGHT = 1,
-  ORIENTATION_PAGE_DOWN = 2,
-  ORIENTATION_PAGE_LEFT = 3,
-};
-
-/**
- * The grapheme clusters within a line of text are laid out logically
- * in this direction, judged when looking at the text line rotated so that
- * its Orientation is "page up".
- *
- * For English text, the writing direction is left-to-right.  For the
- * Chinese text in the above example, the writing direction is top-to-bottom.
- */
-enum WritingDirection {
-  WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
-  WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
-  WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
-};
-
-/**
- * The text lines are read in the given sequence.
- *
- * In English, the order is top-to-bottom.
- * In Chinese, vertical text lines are read right-to-left.  Mongolian is
- * written in vertical columns top to bottom like Chinese, but the lines
- * order left-to right.
- *
- * Note that only some combinations make sense.  For example,
- * WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
- */
-enum TextlineOrder {
-  TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
-  TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
-  TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
-};
-
-/**
- * Possible modes for page layout analysis. These *must* be kept in order
- * of decreasing amount of layout analysis to be done, except for OSD_ONLY,
- * so that the inequality test macros below work.
- */
-enum PageSegMode {
-  PSM_OSD_ONLY = 0,      ///< Orientation and script detection only.
-  PSM_AUTO_OSD = 1,      ///< Automatic page segmentation with orientation and
-                         ///< script detection. (OSD)
-  PSM_AUTO_ONLY = 2,     ///< Automatic page segmentation, but no OSD, or OCR.
-  PSM_AUTO = 3,          ///< Fully automatic page segmentation, but no OSD.
-  PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
-  PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
-                                  ///< vertically aligned text.
-  PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
-  PSM_SINGLE_LINE = 7,  ///< Treat the image as a single text line.
-  PSM_SINGLE_WORD = 8,  ///< Treat the image as a single word.
-  PSM_CIRCLE_WORD = 9,  ///< Treat the image as a single word in a circle.
-  PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
-  PSM_SPARSE_TEXT =
-      11, ///< Find as much text as possible in no particular order.
-  PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
-  PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
-                     ///< hacks that are Tesseract-specific.
-
-  PSM_COUNT ///< Number of enum entries.
-};
-
-/**
- * Inline functions that act on a PageSegMode to determine whether components of
- * layout analysis are enabled.
- * *Depend critically on the order of elements of PageSegMode.*
- * NOTE that arg is an int for compatibility with INT_PARAM.
- */
-inline bool PSM_OSD_ENABLED(int pageseg_mode) {
-  return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
-  return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
-}
-inline bool PSM_SPARSE(int pageseg_mode) {
-  return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
-}
-inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
-}
-inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
-  return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
-         pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-
-/**
- * enum of the elements of the page hierarchy, used in ResultIterator
- * to provide functions that operate on each level without having to
- * have 5x as many functions.
- */
-enum PageIteratorLevel {
-  RIL_BLOCK,    // Block of text/image/separator line.
-  RIL_PARA,     // Paragraph within a block.
-  RIL_TEXTLINE, // Line within a paragraph.
-  RIL_WORD,     // Word within a textline.
-  RIL_SYMBOL    // Symbol/character within a word.
-};
-
-/**
- * JUSTIFICATION_UNKNOWN
- *   The alignment is not clearly one of the other options.  This could happen
- *   for example if there are only one or two lines of text or the text looks
- *   like source code or poetry.
- *
- * NOTA BENE: Fully justified paragraphs (text aligned to both left and right
- *    margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
- *    is written with a left-to-right script and with JUSTIFICATION_RIGHT if
- *    their text is written in a right-to-left script.
- *
- * Interpretation for text read in vertical lines:
- *   "Left" is wherever the starting reading position is.
- *
- * JUSTIFICATION_LEFT
- *   Each line, except possibly the first, is flush to the same left tab stop.
- *
- * JUSTIFICATION_CENTER
- *   The text lines of the paragraph are centered about a line going
- *   down through their middle of the text lines.
- *
- * JUSTIFICATION_RIGHT
- *   Each line, except possibly the first, is flush to the same right tab stop.
- */
-enum ParagraphJustification {
-  JUSTIFICATION_UNKNOWN,
-  JUSTIFICATION_LEFT,
-  JUSTIFICATION_CENTER,
-  JUSTIFICATION_RIGHT,
-};
-
-/**
- * When Tesseract/Cube is initialized we can choose to instantiate/load/run
- * only the Tesseract part, only the Cube part or both along with the combiner.
- * The preference of which engine to use is stored in tessedit_ocr_engine_mode.
- *
- * ATTENTION: When modifying this enum, please make sure to make the
- * appropriate changes to all the enums mirroring it (e.g. OCREngine in
- * cityblock/workflow/detection/detection_storage.proto). Such enums will
- * mention the connection to OcrEngineMode in the comments.
- */
-enum OcrEngineMode {
-  OEM_TESSERACT_ONLY,          // Run Tesseract only - fastest; deprecated
-  OEM_LSTM_ONLY,               // Run just the LSTM line recognizer.
-  OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
-                               // to Tesseract when things get difficult.
-                               // deprecated
-  OEM_DEFAULT,                 // Specify this mode when calling init_*(),
-                               // to indicate that any of the above modes
-                               // should be automatically inferred from the
-                               // variables in the language-specific config,
-                               // command-line configs, or if not specified
-                               // in any of the above should be set to the
-                               // default OEM_TESSERACT_ONLY.
-  OEM_COUNT                    // Number of OEMs
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
--- a/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/renderer.h
+++ b/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/renderer.h
@ -1,311 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        renderer.h
-// Description: Rendering interface to inject into TessBaseAPI
-//
-// (C) Copyright 2011, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_RENDERER_H_
-#define TESSERACT_API_RENDERER_H_
-
-#include "export.h"
-
-// To avoid collision with other typenames include the ABSOLUTE MINIMUM
-// complexity of includes here. Use forward declarations wherever possible
-// and hide includes of complex types in baseapi.cpp.
-#include <cstdint>
-#include <string> // for std::string
-#include <vector> // for std::vector
-
-struct Pix;
-
-namespace tesseract {
-
-class TessBaseAPI;
-
-/**
- * Interface for rendering tesseract results into a document, such as text,
- * HOCR or pdf. This class is abstract. Specific classes handle individual
- * formats. This interface is then used to inject the renderer class into
- * tesseract when processing images.
- *
- * For simplicity implementing this with tesseract version 3.01,
- * the renderer contains document state that is cleared from document
- * to document just as the TessBaseAPI is. This way the base API can just
- * delegate its rendering functionality to injected renderers, and the
- * renderers can manage the associated state needed for the specific formats
- * in addition to the heuristics for producing it.
- */
-class TESS_API TessResultRenderer {
-public:
-  virtual ~TessResultRenderer();
-
-  // Takes ownership of pointer so must be new'd instance.
-  // Renderers aren't ordered, but appends the sequences of next parameter
-  // and existing next(). The renderers should be unique across both lists.
-  void insert(TessResultRenderer *next);
-
-  // Returns the next renderer or nullptr.
-  TessResultRenderer *next() {
-    return next_;
-  }
-
-  /**
-   * Starts a new document with the given title.
-   * This clears the contents of the output data.
-   * Title should use UTF-8 encoding.
-   */
-  bool BeginDocument(const char *title);
-
-  /**
-   * Adds the recognized text from the source image to the current document.
-   * Invalid if BeginDocument not yet called.
-   *
-   * Note that this API is a bit weird but is designed to fit into the
-   * current TessBaseAPI implementation where the api has lots of state
-   * information that we might want to add in.
-   */
-  bool AddImage(TessBaseAPI *api);
-
-  /**
-   * Finishes the document and finalizes the output data
-   * Invalid if BeginDocument not yet called.
-   */
-  bool EndDocument();
-
-  const char *file_extension() const {
-    return file_extension_;
-  }
-  const char *title() const {
-    return title_.c_str();
-  }
-
-  // Is everything fine? Otherwise something went wrong.
-  bool happy() const {
-    return happy_;
-  }
-
-  /**
-   * Returns the index of the last image given to AddImage
-   * (i.e. images are incremented whether the image succeeded or not)
-   *
-   * This is always defined. It means either the number of the
-   * current image, the last image ended, or in the completed document
-   * depending on when in the document lifecycle you are looking at it.
-   * Will return -1 if a document was never started.
-   */
-  int imagenum() const {
-    return imagenum_;
-  }
-
-protected:
-  /**
-   * Called by concrete classes.
-   *
-   * outputbase is the name of the output file excluding
-   * extension. For example, "/path/to/chocolate-chip-cookie-recipe"
-   *
-   * extension indicates the file extension to be used for output
-   * files. For example "pdf" will produce a .pdf file, and "hocr"
-   * will produce .hocr files.
-   */
-  TessResultRenderer(const char *outputbase, const char *extension);
-
-  // Hook for specialized handling in BeginDocument()
-  virtual bool BeginDocumentHandler();
-
-  // This must be overridden to render the OCR'd results
-  virtual bool AddImageHandler(TessBaseAPI *api) = 0;
-
-  // Hook for specialized handling in EndDocument()
-  virtual bool EndDocumentHandler();
-
-  // Renderers can call this to append '\0' terminated strings into
-  // the output string returned by GetOutput.
-  // This method will grow the output buffer if needed.
-  void AppendString(const char *s);
-
-  // Renderers can call this to append binary byte sequences into
-  // the output string returned by GetOutput. Note that s is not necessarily
-  // '\0' terminated (and can contain '\0' within it).
-  // This method will grow the output buffer if needed.
-  void AppendData(const char *s, int len);
-
-private:
-  TessResultRenderer *next_;   // Can link multiple renderers together
-  FILE *fout_;                 // output file pointer
-  const char *file_extension_; // standard extension for generated output
-  std::string title_;          // title of document being rendered
-  int imagenum_;               // index of last image added
-  bool happy_;                 // I get grumpy when the disk fills up, etc.
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessTextRenderer : public TessResultRenderer {
-public:
-  explicit TessTextRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into an hocr text string
- */
-class TESS_API TessHOcrRenderer : public TessResultRenderer {
-public:
-  explicit TessHOcrRenderer(const char *outputbase, bool font_info);
-  explicit TessHOcrRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool font_info_; // whether to print font information
-};
-
-/**
- * Renders tesseract output into an alto text string
- */
-class TESS_API TessAltoRenderer : public TessResultRenderer {
-public:
-  explicit TessAltoRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool begin_document;
-};
-
-/**
- * Renders Tesseract output into a TSV string
- */
-class TESS_API TessTsvRenderer : public TessResultRenderer {
-public:
-  explicit TessTsvRenderer(const char *outputbase, bool font_info);
-  explicit TessTsvRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool font_info_; // whether to print font information
-};
-
-/**
- * Renders tesseract output into searchable PDF
- */
-class TESS_API TessPDFRenderer : public TessResultRenderer {
-public:
-  // datadir is the location of the TESSDATA. We need it because
-  // we load a custom PDF font from this location.
-  TessPDFRenderer(const char *outputbase, const char *datadir,
-                  bool textonly = false);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  // We don't want to have every image in memory at once,
-  // so we store some metadata as we go along producing
-  // PDFs one page at a time. At the end, that metadata is
-  // used to make everything that isn't easily handled in a
-  // streaming fashion.
-  long int obj_;                  // counter for PDF objects
-  std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
-  std::vector<long int> pages_;   // object number for every /Page object
-  std::string datadir_;           // where to find the custom font
-  bool textonly_;                 // skip images if set
-  // Bookkeeping only. DIY = Do It Yourself.
-  void AppendPDFObjectDIY(size_t objectsize);
-  // Bookkeeping + emit data.
-  void AppendPDFObject(const char *data);
-  // Create the /Contents object for an entire page.
-  char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
-  // Turn an image into a PDF object. Only transcode if we have to.
-  static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
-                            char **pdf_object, long int *pdf_object_size,
-                            int jpg_quality);
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessUnlvRenderer : public TessResultRenderer {
-public:
-  explicit TessUnlvRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string for LSTMBox
- */
-class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
-public:
-  explicit TessLSTMBoxRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessBoxTextRenderer : public TessResultRenderer {
-public:
-  explicit TessBoxTextRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string in WordStr format
- */
-class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
-public:
-  explicit TessWordStrBoxRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-/**
- * Renders tesseract output into an osd text string
- */
-class TESS_API TessOsdRenderer : public TessResultRenderer {
-public:
-  explicit TessOsdRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-#endif // ndef DISABLED_LEGACY_ENGINE
-
-} // namespace tesseract.
-
-#endif // TESSERACT_API_RENDERER_H_
--- a/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/resultiterator.h
+++ b/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/resultiterator.h
@ -1,250 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        resultiterator.h
-// Description: Iterator for tesseract results that is capable of
-//              iterating in proper reading order over Bi Directional
-//              (e.g. mixed Hebrew and English) text.
-// Author:      David Eger
-//
-// (C) Copyright 2011, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
-#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
-
-#include "export.h"            // for TESS_API, TESS_LOCAL
-#include "ltrresultiterator.h" // for LTRResultIterator
-#include "publictypes.h"       // for PageIteratorLevel
-#include "unichar.h"           // for StrongScriptDirection
-
-#include <set>    // for std::pair
-#include <vector> // for std::vector
-
-namespace tesseract {
-
-class TESS_API ResultIterator : public LTRResultIterator {
-public:
-  static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
-
-  /**
-   * ResultIterator is copy constructible!
-   * The default copy constructor works just fine for us.
-   */
-  ~ResultIterator() override = default;
-
-  // ============= Moving around within the page ============.
-  /**
-   * Moves the iterator to point to the start of the page to begin
-   * an iteration.
-   */
-  void Begin() override;
-
-  /**
-   * Moves to the start of the next object at the given level in the
-   * page hierarchy in the appropriate reading order and returns false if
-   * the end of the page was reached.
-   * NOTE that RIL_SYMBOL will skip non-text blocks, but all other
-   * PageIteratorLevel level values will visit each non-text block once.
-   * Think of non text blocks as containing a single para, with a single line,
-   * with a single imaginary word.
-   * Calls to Next with different levels may be freely intermixed.
-   * This function iterates words in right-to-left scripts correctly, if
-   * the appropriate language has been loaded into Tesseract.
-   */
-  bool Next(PageIteratorLevel level) override;
-
-  /**
-   * IsAtBeginningOf() returns whether we're at the logical beginning of the
-   * given level.  (as opposed to ResultIterator's left-to-right top-to-bottom
-   * order).  Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
-   * For a full description, see pageiterator.h
-   */
-  bool IsAtBeginningOf(PageIteratorLevel level) const override;
-
-  /**
-   * Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
-   * For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
-   * point at the last word in a paragraph.  See PageIterator for full comment.
-   */
-  bool IsAtFinalElement(PageIteratorLevel level,
-                        PageIteratorLevel element) const override;
-
-  // ============= Functions that refer to words only ============.
-  // Returns the number of blanks before the current word.
-  int BlanksBeforeWord() const;
-
-  // ============= Accessing data ==============.
-
-  /**
-   * Returns the null terminated UTF-8 encoded text string for the current
-   * object at the given level. Use delete [] to free after use.
-   */
-  virtual char *GetUTF8Text(PageIteratorLevel level) const;
-
-  /**
-   * Returns the LSTM choices for every LSTM timestep for the current word.
-   */
-  virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
-      *GetRawLSTMTimesteps() const;
-  virtual std::vector<std::vector<std::pair<const char *, float>>>
-      *GetBestLSTMSymbolChoices() const;
-
-  /**
-   * Return whether the current paragraph's dominant reading direction
-   * is left-to-right (as opposed to right-to-left).
-   */
-  bool ParagraphIsLtr() const;
-
-  // ============= Exposed only for testing =============.
-
-  /**
-   * Yields the reading order as a sequence of indices and (optional)
-   * meta-marks for a set of words (given left-to-right).
-   * The meta marks are passed as negative values:
-   *   kMinorRunStart  Start of minor direction text.
-   *   kMinorRunEnd    End of minor direction text.
-   *   kComplexWord    The next indexed word contains both left-to-right and
-   *                    right-to-left characters and was treated as neutral.
-   *
-   * For example, suppose we have five words in a text line,
-   * indexed [0,1,2,3,4] from the leftmost side of the text line.
-   * The following are all believable reading_orders:
-   *
-   * Left-to-Right (in ltr paragraph):
-   *     { 0, 1, 2, 3, 4 }
-   * Left-to-Right (in rtl paragraph):
-   *     { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
-   * Right-to-Left (in rtl paragraph):
-   *     { 4, 3, 2, 1, 0 }
-   * Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
-   *     { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
-   */
-  static void CalculateTextlineOrder(
-      bool paragraph_is_ltr,
-      const std::vector<StrongScriptDirection> &word_dirs,
-      std::vector<int> *reading_order);
-
-  static const int kMinorRunStart;
-  static const int kMinorRunEnd;
-  static const int kComplexWord;
-
-protected:
-  /**
-   * We presume the data associated with the given iterator will outlive us.
-   * NB: This is private because it does something that is non-obvious:
-   *   it resets to the beginning of the paragraph instead of staying wherever
-   *   resit might have pointed.
-   */
-  explicit ResultIterator(const LTRResultIterator &resit);
-
-private:
-  /**
-   * Calculates the current paragraph's dominant writing direction.
-   * Typically, members should use current_paragraph_ltr_ instead.
-   */
-  bool CurrentParagraphIsLtr() const;
-
-  /**
-   * Returns word indices as measured from resit->RestartRow() = index 0
-   * for the reading order of words within a textline given an iterator
-   * into the middle of the text line.
-   * In addition to non-negative word indices, the following negative values
-   * may be inserted:
-   *   kMinorRunStart  Start of minor direction text.
-   *   kMinorRunEnd    End of minor direction text.
-   *   kComplexWord    The previous word contains both left-to-right and
-   *                   right-to-left characters and was treated as neutral.
-   */
-  void CalculateTextlineOrder(bool paragraph_is_ltr,
-                              const LTRResultIterator &resit,
-                              std::vector<int> *indices) const;
-  /** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
-  void CalculateTextlineOrder(bool paragraph_is_ltr,
-                              const LTRResultIterator &resit,
-                              std::vector<StrongScriptDirection> *ssd,
-                              std::vector<int> *indices) const;
-
-  /**
-   * What is the index of the current word in a strict left-to-right reading
-   * of the row?
-   */
-  int LTRWordIndex() const;
-
-  /**
-   * Given an iterator pointing at a word, returns the logical reading order
-   * of blob indices for the word.
-   */
-  void CalculateBlobOrder(std::vector<int> *blob_indices) const;
-
-  /** Precondition: current_paragraph_is_ltr_ is set. */
-  void MoveToLogicalStartOfTextline();
-
-  /**
-   * Precondition: current_paragraph_is_ltr_ and in_minor_direction_
-   * are set.
-   */
-  void MoveToLogicalStartOfWord();
-
-  /** Are we pointing at the final (reading order) symbol of the word? */
-  bool IsAtFinalSymbolOfWord() const;
-
-  /** Are we pointing at the first (reading order) symbol of the word? */
-  bool IsAtFirstSymbolOfWord() const;
-
-  /**
-   * Append any extra marks that should be appended to this word when printed.
-   * Mostly, these are Unicode BiDi control characters.
-   */
-  void AppendSuffixMarks(std::string *text) const;
-
-  /** Appends the current word in reading order to the given buffer.*/
-  void AppendUTF8WordText(std::string *text) const;
-
-  /**
-   * Appends the text of the current text line, *assuming this iterator is
-   * positioned at the beginning of the text line*  This function
-   * updates the iterator to point to the first position past the text line.
-   * Each textline is terminated in a single newline character.
-   * If the textline ends a paragraph, it gets a second terminal newline.
-   */
-  void IterateAndAppendUTF8TextlineText(std::string *text);
-
-  /**
-   * Appends the text of the current paragraph in reading order
-   * to the given buffer.
-   * Each textline is terminated in a single newline character, and the
-   * paragraph gets an extra newline at the end.
-   */
-  void AppendUTF8ParagraphText(std::string *text) const;
-
-  /** Returns whether the bidi_debug flag is set to at least min_level. */
-  bool BidiDebug(int min_level) const;
-
-  bool current_paragraph_is_ltr_;
-
-  /**
-   * Is the currently pointed-at character at the beginning of
-   * a minor-direction run?
-   */
-  bool at_beginning_of_minor_run_;
-
-  /** Is the currently pointed-at character in a minor-direction sequence? */
-  bool in_minor_direction_;
-
-  /**
-   * Should detected inter-word spaces be preserved, or "compressed" to a single
-   * space character (default behavior).
-   */
-  bool preserve_interword_spaces_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/unichar.h
+++ b/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/unichar.h
@ -1,174 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        unichar.h
-// Description: Unicode character/ligature class.
-// Author:      Ray Smith
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCUTIL_UNICHAR_H_
-#define TESSERACT_CCUTIL_UNICHAR_H_
-
-#include "export.h"
-
-#include <memory.h>
-#include <cstring>
-#include <string>
-#include <vector>
-
-namespace tesseract {
-
-// Maximum number of characters that can be stored in a UNICHAR. Must be
-// at least 4. Must not exceed 31 without changing the coding of length.
-#define UNICHAR_LEN 30
-
-// A UNICHAR_ID is the unique id of a unichar.
-using UNICHAR_ID = int;
-
-// A variable to indicate an invalid or uninitialized unichar id.
-static const int INVALID_UNICHAR_ID = -1;
-// A special unichar that corresponds to INVALID_UNICHAR_ID.
-static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
-
-enum StrongScriptDirection {
-  DIR_NEUTRAL = 0,       // Text contains only neutral characters.
-  DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
-  DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
-  DIR_MIX = 3,           // Text contains a mixture of left-to-right
-                         // and right-to-left characters.
-};
-
-using char32 = signed int;
-
-// The UNICHAR class holds a single classification result. This may be
-// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
-// multiple Unicode characters representing the NFKC expansion of a ligature
-// such as fi, ffl etc. These are also stored as utf8.
-class TESS_API UNICHAR {
-public:
-  UNICHAR() {
-    memset(chars, 0, UNICHAR_LEN);
-  }
-
-  // Construct from a utf8 string. If len<0 then the string is null terminated.
-  // If the string is too long to fit in the UNICHAR then it takes only what
-  // will fit.
-  UNICHAR(const char *utf8_str, int len);
-
-  // Construct from a single UCS4 character.
-  explicit UNICHAR(int unicode);
-
-  // Default copy constructor and operator= are OK.
-
-  // Get the first character as UCS-4.
-  int first_uni() const;
-
-  // Get the length of the UTF8 string.
-  int utf8_len() const {
-    int len = chars[UNICHAR_LEN - 1];
-    return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
-  }
-
-  // Get a UTF8 string, but NOT nullptr terminated.
-  const char *utf8() const {
-    return chars;
-  }
-
-  // Get a terminated UTF8 string: Must delete[] it after use.
-  char *utf8_str() const;
-
-  // Get the number of bytes in the first character of the given utf8 string.
-  static int utf8_step(const char *utf8_str);
-
-  // A class to simplify iterating over and accessing elements of a UTF8
-  // string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
-  // take ownership of the underlying byte array. It also does not permit
-  // modification of the array (as the name suggests).
-  //
-  // Example:
-  //   for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
-  //        it != UNICHAR::end(str, len);
-  //        ++it) {
-  //     printf("UCS-4 symbol code = %d\n", *it);
-  //     char buf[5];
-  //     int char_len = it.get_utf8(buf); buf[char_len] = '\0';
-  //     printf("Char = %s\n", buf);
-  //   }
-  class TESS_API const_iterator {
-    using CI = const_iterator;
-
-  public:
-    // Step to the next UTF8 character.
-    // If the current position is at an illegal UTF8 character, then print an
-    // error message and step by one byte. If the current position is at a
-    // nullptr value, don't step past it.
-    const_iterator &operator++();
-
-    // Return the UCS-4 value at the current position.
-    // If the current position is at an illegal UTF8 value, return a single
-    // space character.
-    int operator*() const;
-
-    // Store the UTF-8 encoding of the current codepoint into buf, which must be
-    // at least 4 bytes long. Return the number of bytes written.
-    // If the current position is at an illegal UTF8 value, writes a single
-    // space character and returns 1.
-    // Note that this method does not null-terminate the buffer.
-    int get_utf8(char *buf) const;
-    // Returns the number of bytes of the current codepoint. Returns 1 if the
-    // current position is at an illegal UTF8 value.
-    int utf8_len() const;
-    // Returns true if the UTF-8 encoding at the current position is legal.
-    bool is_legal() const;
-
-    // Return the pointer into the string at the current position.
-    const char *utf8_data() const {
-      return it_;
-    }
-
-    // Iterator equality operators.
-    friend bool operator==(const CI &lhs, const CI &rhs) {
-      return lhs.it_ == rhs.it_;
-    }
-    friend bool operator!=(const CI &lhs, const CI &rhs) {
-      return !(lhs == rhs);
-    }
-
-  private:
-    friend class UNICHAR;
-    explicit const_iterator(const char *it) : it_(it) {}
-
-    const char *it_; // Pointer into the string.
-  };
-
-  // Create a start/end iterator pointing to a string. Note that these methods
-  // are static and do NOT create a copy or take ownership of the underlying
-  // array.
-  static const_iterator begin(const char *utf8_str, int byte_length);
-  static const_iterator end(const char *utf8_str, int byte_length);
-
-  // Converts a utf-8 string to a vector of unicodes.
-  // Returns an empty vector if the input contains invalid UTF-8.
-  static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
-  // Converts a vector of unicodes to a utf8 string.
-  // Returns an empty string if the input contains an invalid unicode.
-  static std::string UTF32ToUTF8(const std::vector<char32> &str32);
-
-private:
-  // A UTF-8 representation of 1 or more Unicode characters.
-  // The last element (chars[UNICHAR_LEN - 1]) is a length if
-  // its value < UNICHAR_LEN, otherwise it is a genuine character.
-  char chars[UNICHAR_LEN]{};
-};
-
-} // namespace tesseract
-
-#endif // TESSERACT_CCUTIL_UNICHAR_H_
--- a/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/version.h
+++ b/third_party/ocr/tesseract-ocr/kylin/aarch64/include/tesseract/version.h
@ -1,34 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        version.h
-// Description: Version information
-//
-// (C) Copyright 2018, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_VERSION_H_
-#define TESSERACT_API_VERSION_H_
-
-// clang-format off
-
-#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
-#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
-#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
-
-#define TESSERACT_VERSION          \
-  (TESSERACT_MAJOR_VERSION << 16 | \
-   TESSERACT_MINOR_VERSION <<  8 | \
-   TESSERACT_MICRO_VERSION)
-
-#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
-
-// clang-format on
-
-#endif // TESSERACT_API_VERSION_H_
--- a/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/baseapi.h
+++ b/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/baseapi.h
@ -1,812 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        baseapi.h
-// Description: Simple API for calling tesseract.
-// Author:      Ray Smith
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_BASEAPI_H_
-#define TESSERACT_API_BASEAPI_H_
-
-#ifdef HAVE_CONFIG_H
-#  include "config_auto.h" // DISABLED_LEGACY_ENGINE
-#endif
-
-#include "export.h"
-#include "pageiterator.h"
-#include "publictypes.h"
-#include "resultiterator.h"
-#include "unichar.h"
-
-#include "version.h"
-
-#include <cstdio>
-#include <vector> // for std::vector
-
-struct Pix;
-struct Pixa;
-struct Boxa;
-
-namespace tesseract {
-
-class PAGE_RES;
-class ParagraphModel;
-class BLOCK_LIST;
-class ETEXT_DESC;
-struct OSResults;
-class UNICHARSET;
-
-class Dawg;
-class Dict;
-class EquationDetect;
-class PageIterator;
-class ImageThresholder;
-class LTRResultIterator;
-class ResultIterator;
-class MutableIterator;
-class TessResultRenderer;
-class Tesseract;
-
-// Function to read a std::vector<char> from a whole file.
-// Returns false on failure.
-using FileReader = bool (*)(const char *filename, std::vector<char> *data);
-
-using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
-                               bool) const;
-using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
-                                                  int, const char *, int);
-
-/**
- * Base class for all tesseract APIs.
- * Specific classes can add ability to work on different inputs or produce
- * different outputs.
- * This class is mostly an interface layer on top of the Tesseract instance
- * class to hide the data types so that users of this class don't have to
- * include any other Tesseract headers.
- */
-class TESS_API TessBaseAPI {
-public:
-  TessBaseAPI();
-  virtual ~TessBaseAPI();
-  // Copy constructor and assignment operator are currently unsupported.
-  TessBaseAPI(TessBaseAPI const &) = delete;
-  TessBaseAPI &operator=(TessBaseAPI const &) = delete;
-
-  /**
-   * Returns the version identifier as a static string. Do not delete.
-   */
-  static const char *Version();
-
-  /**
-   * If compiled with OpenCL AND an available OpenCL
-   * device is deemed faster than serial code, then
-   * "device" is populated with the cl_device_id
-   * and returns sizeof(cl_device_id)
-   * otherwise *device=nullptr and returns 0.
-   */
-  static size_t getOpenCLDevice(void **device);
-
-  /**
-   * Set the name of the input file. Needed for training and
-   * reading a UNLV zone file, and for searchable PDF output.
-   */
-  void SetInputName(const char *name);
-  /**
-   * These functions are required for searchable PDF output.
-   * We need our hands on the input file so that we can include
-   * it in the PDF without transcoding. If that is not possible,
-   * we need the original image. Finally, resolution metadata
-   * is stored in the PDF so we need that as well.
-   */
-  const char *GetInputName();
-  // Takes ownership of the input pix.
-  void SetInputImage(Pix *pix);
-  Pix *GetInputImage();
-  int GetSourceYResolution();
-  const char *GetDatapath();
-
-  /** Set the name of the bonus output files. Needed only for debugging. */
-  void SetOutputName(const char *name);
-
-  /**
-   * Set the value of an internal "parameter."
-   * Supply the name of the parameter and the value as a string, just as
-   * you would in a config file.
-   * Returns false if the name lookup failed.
-   * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
-   * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
-   * SetVariable may be used before Init, but settings will revert to
-   * defaults on End().
-   *
-   * Note: Must be called after Init(). Only works for non-init variables
-   * (init variables should be passed to Init()).
-   */
-  bool SetVariable(const char *name, const char *value);
-  bool SetDebugVariable(const char *name, const char *value);
-
-  /**
-   * Returns true if the parameter was found among Tesseract parameters.
-   * Fills in value with the value of the parameter.
-   */
-  bool GetIntVariable(const char *name, int *value) const;
-  bool GetBoolVariable(const char *name, bool *value) const;
-  bool GetDoubleVariable(const char *name, double *value) const;
-
-  /**
-   * Returns the pointer to the string that represents the value of the
-   * parameter if it was found among Tesseract parameters.
-   */
-  const char *GetStringVariable(const char *name) const;
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-  /**
-   * Print Tesseract fonts table to the given file.
-   */
-  void PrintFontsTable(FILE *fp) const;
-
-#endif
-
-  /**
-   * Print Tesseract parameters to the given file.
-   */
-  void PrintVariables(FILE *fp) const;
-
-  /**
-   * Get value of named variable as a string, if it exists.
-   */
-  bool GetVariableAsString(const char *name, std::string *val) const;
-
-  /**
-   * Instances are now mostly thread-safe and totally independent,
-   * but some global parameters remain. Basically it is safe to use multiple
-   * TessBaseAPIs in different threads in parallel, UNLESS:
-   * you use SetVariable on some of the Params in classify and textord.
-   * If you do, then the effect will be to change it for all your instances.
-   *
-   * Start tesseract. Returns zero on success and -1 on failure.
-   * NOTE that the only members that may be called before Init are those
-   * listed above here in the class definition.
-   *
-   * The datapath must be the name of the tessdata directory.
-   * The language is (usually) an ISO 639-3 string or nullptr will default to
-   * eng. It is entirely safe (and eventually will be efficient too) to call
-   * Init multiple times on the same instance to change language, or just
-   * to reset the classifier.
-   * The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
-   * that multiple languages are to be loaded. Eg hin+eng will load Hindi and
-   * English. Languages may specify internally that they want to be loaded
-   * with one or more other languages, so the ~ sign is available to override
-   * that. Eg if hin were set to load eng by default, then hin+~eng would force
-   * loading only hin. The number of loaded languages is limited only by
-   * memory, with the caveat that loading additional languages will impact
-   * both speed and accuracy, as there is more work to do to decide on the
-   * applicable language, and there is more chance of hallucinating incorrect
-   * words.
-   * WARNING: On changing languages, all Tesseract parameters are reset
-   * back to their default values. (Which may vary between languages.)
-   * If you have a rare need to set a Variable that controls
-   * initialization for a second call to Init you should explicitly
-   * call End() and then use SetVariable before Init. This is only a very
-   * rare use case, since there are very few uses that require any parameters
-   * to be set before Init.
-   *
-   * If set_only_non_debug_params is true, only params that do not contain
-   * "debug" in the name will be set.
-   */
-  int Init(const char *datapath, const char *language, OcrEngineMode mode,
-           char **configs, int configs_size,
-           const std::vector<std::string> *vars_vec,
-           const std::vector<std::string> *vars_values,
-           bool set_only_non_debug_params);
-  int Init(const char *datapath, const char *language, OcrEngineMode oem) {
-    return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
-  }
-  int Init(const char *datapath, const char *language) {
-    return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
-                false);
-  }
-  // In-memory version reads the traineddata file directly from the given
-  // data[data_size] array, and/or reads data via a FileReader.
-  int Init(const char *data, int data_size, const char *language,
-           OcrEngineMode mode, char **configs, int configs_size,
-           const std::vector<std::string> *vars_vec,
-           const std::vector<std::string> *vars_values,
-           bool set_only_non_debug_params, FileReader reader);
-
-  /**
-   * Returns the languages string used in the last valid initialization.
-   * If the last initialization specified "deu+hin" then that will be
-   * returned. If hin loaded eng automatically as well, then that will
-   * not be included in this list. To find the languages actually
-   * loaded use GetLoadedLanguagesAsVector.
-   * The returned string should NOT be deleted.
-   */
-  const char *GetInitLanguagesAsString() const;
-
-  /**
-   * Returns the loaded languages in the vector of std::string.
-   * Includes all languages loaded by the last Init, including those loaded
-   * as dependencies of other loaded languages.
-   */
-  void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
-
-  /**
-   * Returns the available languages in the sorted vector of std::string.
-   */
-  void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
-
-  /**
-   * Init only for page layout analysis. Use only for calls to SetImage and
-   * AnalysePage. Calls that attempt recognition will generate an error.
-   */
-  void InitForAnalysePage();
-
-  /**
-   * Read a "config" file containing a set of param, value pairs.
-   * Searches the standard places: tessdata/configs, tessdata/tessconfigs
-   * and also accepts a relative or absolute path name.
-   * Note: only non-init params will be set (init params are set by Init()).
-   */
-  void ReadConfigFile(const char *filename);
-  /** Same as above, but only set debug params from the given config file. */
-  void ReadDebugConfigFile(const char *filename);
-
-  /**
-   * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
-   * The mode is stored as an IntParam so it can also be modified by
-   * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
-   */
-  void SetPageSegMode(PageSegMode mode);
-
-  /** Return the current page segmentation mode. */
-  PageSegMode GetPageSegMode() const;
-
-  /**
-   * Recognize a rectangle from an image and return the result as a string.
-   * May be called many times for a single Init.
-   * Currently has no error checking.
-   * Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
-   * Palette color images will not work properly and must be converted to
-   * 24 bit.
-   * Binary images of 1 bit per pixel may also be given but they must be
-   * byte packed with the MSB of the first byte being the first pixel, and a
-   * 1 represents WHITE. For binary images set bytes_per_pixel=0.
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   *
-   * Note that TesseractRect is the simplified convenience interface.
-   * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
-   * and one or more of the Get*Text functions below.
-   */
-  char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
-                      int bytes_per_line, int left, int top, int width,
-                      int height);
-
-  /**
-   * Call between pages or documents etc to free up memory and forget
-   * adaptive data.
-   */
-  void ClearAdaptiveClassifier();
-
-  /**
-   * @defgroup AdvancedAPI Advanced API
-   * The following methods break TesseractRect into pieces, so you can
-   * get hold of the thresholded image, get the text in different formats,
-   * get bounding boxes, confidences etc.
-   */
-  /* @{ */
-
-  /**
-   * Provide an image for Tesseract to recognize. Format is as
-   * TesseractRect above. Copies the image buffer and converts to Pix.
-   * SetImage clears all recognition results, and sets the rectangle to the
-   * full image, so it may be followed immediately by a GetUTF8Text, and it
-   * will automatically perform recognition.
-   */
-  void SetImage(const unsigned char *imagedata, int width, int height,
-                int bytes_per_pixel, int bytes_per_line);
-
-  /**
-   * Provide an image for Tesseract to recognize. As with SetImage above,
-   * Tesseract takes its own copy of the image, so it need not persist until
-   * after Recognize.
-   * Pix vs raw, which to use?
-   * Use Pix where possible. Tesseract uses Pix as its internal representation
-   * and it is therefore more efficient to provide a Pix directly.
-   */
-  void SetImage(Pix *pix);
-
-  /**
-   * Set the resolution of the source image in pixels per inch so font size
-   * information can be calculated in results.  Call this after SetImage().
-   */
-  void SetSourceResolution(int ppi);
-
-  /**
-   * Restrict recognition to a sub-rectangle of the image. Call after SetImage.
-   * Each SetRectangle clears the recogntion results so multiple rectangles
-   * can be recognized with the same image.
-   */
-  void SetRectangle(int left, int top, int width, int height);
-
-  /**
-   * Get a copy of the internal thresholded image from Tesseract.
-   * Caller takes ownership of the Pix and must pixDestroy it.
-   * May be called any time after SetImage, or after TesseractRect.
-   */
-  Pix *GetThresholdedImage();
-
-  /**
-   * Get the result of page layout analysis as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   */
-  Boxa *GetRegions(Pixa **pixa);
-
-  /**
-   * Get the textlines as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * If raw_image is true, then extract from the original image instead of the
-   * thresholded image and pad by raw_padding pixels.
-   * If blockids is not nullptr, the block-id of each line is also returned as
-   * an array of one element per line. delete [] after use. If paraids is not
-   * nullptr, the paragraph-id of each line within its block is also returned as
-   * an array of one element per line. delete [] after use.
-   */
-  Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
-                     int **blockids, int **paraids);
-  /*
-   Helper method to extract from the thresholded image. (most common usage)
-*/
-  Boxa *GetTextlines(Pixa **pixa, int **blockids) {
-    return GetTextlines(false, 0, pixa, blockids, nullptr);
-  }
-
-  /**
-   * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
-   * pair, in reading order. Enables downstream handling of non-rectangular
-   * regions.
-   * Can be called before or after Recognize.
-   * If blockids is not nullptr, the block-id of each line is also returned as
-   * an array of one element per line. delete [] after use.
-   */
-  Boxa *GetStrips(Pixa **pixa, int **blockids);
-
-  /**
-   * Get the words as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   */
-  Boxa *GetWords(Pixa **pixa);
-
-  /**
-   * Gets the individual connected (text) components (created
-   * after pages segmentation step, but before recognition)
-   * as a leptonica-style Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * Note: the caller is responsible for calling boxaDestroy()
-   * on the returned Boxa array and pixaDestroy() on cc array.
-   */
-  Boxa *GetConnectedComponents(Pixa **cc);
-
-  /**
-   * Get the given level kind of components (block, textline, word etc.) as a
-   * leptonica-style Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * If blockids is not nullptr, the block-id of each component is also returned
-   * as an array of one element per component. delete [] after use.
-   * If blockids is not nullptr, the paragraph-id of each component with its
-   * block is also returned as an array of one element per component. delete []
-   * after use. If raw_image is true, then portions of the original image are
-   * extracted instead of the thresholded image and padded with raw_padding. If
-   * text_only is true, then only text components are returned.
-   */
-  Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
-                           bool raw_image, int raw_padding, Pixa **pixa,
-                           int **blockids, int **paraids);
-  // Helper function to get binary images with no padding (most common usage).
-  Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
-                           Pixa **pixa, int **blockids) {
-    return GetComponentImages(level, text_only, false, 0, pixa, blockids,
-                              nullptr);
-  }
-
-  /**
-   * Returns the scale factor of the thresholded image that would be returned by
-   * GetThresholdedImage() and the various GetX() methods that call
-   * GetComponentImages().
-   * Returns 0 if no thresholder has been set.
-   */
-  int GetThresholdedImageScaleFactor() const;
-
-  /**
-   * Runs page layout analysis in the mode set by SetPageSegMode.
-   * May optionally be called prior to Recognize to get access to just
-   * the page layout results. Returns an iterator to the results.
-   * If merge_similar_words is true, words are combined where suitable for use
-   * with a line recognizer. Use if you want to use AnalyseLayout to find the
-   * textlines, and then want to process textline fragments with an external
-   * line recognizer.
-   * Returns nullptr on error or an empty page.
-   * The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  PageIterator *AnalyseLayout();
-  PageIterator *AnalyseLayout(bool merge_similar_words);
-
-  /**
-   * Recognize the image from SetAndThresholdImage, generating Tesseract
-   * internal structures. Returns 0 on success.
-   * Optional. The Get*Text functions below will call Recognize if needed.
-   * After Recognize, the output is kept internally until the next SetImage.
-   */
-  int Recognize(ETEXT_DESC *monitor);
-
-  /**
-   * Methods to retrieve information after SetAndThresholdImage(),
-   * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
-   */
-
-  /**
-   * Turns images into symbolic text.
-   *
-   * filename can point to a single image, a multi-page TIFF,
-   * or a plain text list of image filenames.
-   *
-   * retry_config is useful for debugging. If not nullptr, you can fall
-   * back to an alternate configuration if a page fails for some
-   * reason.
-   *
-   * timeout_millisec terminates processing if any single page
-   * takes too long. Set to 0 for unlimited time.
-   *
-   * renderer is responible for creating the output. For example,
-   * use the TessTextRenderer if you want plaintext output, or
-   * the TessPDFRender to produce searchable PDF.
-   *
-   * If tessedit_page_number is non-negative, will only process that
-   * single page. Works for multi-page tiff file, or filelist.
-   *
-   * Returns true if successful, false on error.
-   */
-  bool ProcessPages(const char *filename, const char *retry_config,
-                    int timeout_millisec, TessResultRenderer *renderer);
-  // Does the real work of ProcessPages.
-  bool ProcessPagesInternal(const char *filename, const char *retry_config,
-                            int timeout_millisec, TessResultRenderer *renderer);
-
-  /**
-   * Turn a single image into symbolic text.
-   *
-   * The pix is the image processed. filename and page_index are
-   * metadata used by side-effect processes, such as reading a box
-   * file or formatting as hOCR.
-   *
-   * See ProcessPages for descriptions of other parameters.
-   */
-  bool ProcessPage(Pix *pix, int page_index, const char *filename,
-                   const char *retry_config, int timeout_millisec,
-                   TessResultRenderer *renderer);
-
-  /**
-   * Get a reading-order iterator to the results of LayoutAnalysis and/or
-   * Recognize. The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  ResultIterator *GetIterator();
-
-  /**
-   * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
-   * The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  MutableIterator *GetMutableIterator();
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   */
-  char *GetUTF8Text();
-
-  /**
-   * Make a HTML-formatted string with hOCR markup from the internal
-   * data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * monitor can be used to
-   *  cancel the recognition
-   *  receive progress callbacks
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
-
-  /**
-   * Make a HTML-formatted string with hOCR markup from the internal
-   * data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetHOCRText(int page_number);
-
-  /**
-   * Make an XML-formatted string with Alto markup from the internal
-   * data structures.
-   */
-  char *GetAltoText(ETEXT_DESC *monitor, int page_number);
-
-  /**
-   * Make an XML-formatted string with Alto markup from the internal
-   * data structures.
-   */
-  char *GetAltoText(int page_number);
-
-  /**
-   * Make a TSV-formatted string from the internal data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetTSVText(int page_number);
-
-  /**
-   * Make a box file for LSTM training from the internal data structures.
-   * Constructs coordinates in the original image - not just the rectangle.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetLSTMBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded in the same
-   * format as a box file used in training.
-   * Constructs coordinates in the original image - not just the rectangle.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded in the same
-   * format as a WordStr box file used in training.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetWordStrBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UNLV format Latin-1 with specific reject and suspect codes.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetUNLVText();
-
-  /**
-   * Detect the orientation of the input image and apparent script (alphabet).
-   * orient_deg is the detected clockwise rotation of the input image in degrees
-   * (0, 90, 180, 270)
-   * orient_conf is the confidence (15.0 is reasonably confident)
-   * script_name is an ASCII string, the name of the script, e.g. "Latin"
-   * script_conf is confidence level in the script
-   * Returns true on success and writes values to each parameter as an output
-   */
-  bool DetectOrientationScript(int *orient_deg, float *orient_conf,
-                               const char **script_name, float *script_conf);
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   * page_number is a 0-based page index that will appear in the osd file.
-   */
-  char *GetOsdText(int page_number);
-
-  /** Returns the (average) confidence value between 0 and 100. */
-  int MeanTextConf();
-  /**
-   * Returns all word confidences (between 0 and 100) in an array, terminated
-   * by -1.  The calling function must delete [] after use.
-   * The number of confidences should correspond to the number of space-
-   * delimited words in GetUTF8Text.
-   */
-  int *AllWordConfidences();
-
-#ifndef DISABLED_LEGACY_ENGINE
-  /**
-   * Applies the given word to the adaptive classifier if possible.
-   * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
-   * tell the boundaries of the graphemes.
-   * Assumes that SetImage/SetRectangle have been used to set the image
-   * to the given word. The mode arg should be PSM_SINGLE_WORD or
-   * PSM_CIRCLE_WORD, as that will be used to control layout analysis.
-   * The currently set PageSegMode is preserved.
-   * Returns false if adaption was not possible for some reason.
-   */
-  bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
-#endif //  ndef DISABLED_LEGACY_ENGINE
-
-  /**
-   * Free up recognition results and any stored image data, without actually
-   * freeing any recognition data that would be time-consuming to reload.
-   * Afterwards, you must call SetImage or TesseractRect before doing
-   * any Recognize or Get* operation.
-   */
-  void Clear();
-
-  /**
-   * Close down tesseract and free up all memory. End() is equivalent to
-   * destructing and reconstructing your TessBaseAPI.
-   * Once End() has been used, none of the other API functions may be used
-   * other than Init and anything declared above it in the class definition.
-   */
-  void End();
-
-  /**
-   * Clear any library-level memory caches.
-   * There are a variety of expensive-to-load constant data structures (mostly
-   * language dictionaries) that are cached globally -- surviving the Init()
-   * and End() of individual TessBaseAPI's.  This function allows the clearing
-   * of these caches.
-   **/
-  static void ClearPersistentCache();
-
-  /**
-   * Check whether a word is valid according to Tesseract's language model
-   * @return 0 if the word is invalid, non-zero if valid.
-   * @warning temporary! This function will be removed from here and placed
-   * in a separate API at some future time.
-   */
-  int IsValidWord(const char *word) const;
-  // Returns true if utf8_character is defined in the UniCharset.
-  bool IsValidCharacter(const char *utf8_character) const;
-
-  bool GetTextDirection(int *out_offset, float *out_slope);
-
-  /** Sets Dict::letter_is_okay_ function to point to the given function. */
-  void SetDictFunc(DictFunc f);
-
-  /** Sets Dict::probability_in_context_ function to point to the given
-   * function.
-   */
-  void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
-
-  /**
-   * Estimates the Orientation And Script of the image.
-   * @return true if the image was processed successfully.
-   */
-  bool DetectOS(OSResults *);
-
-  /**
-   * Return text orientation of each block as determined by an earlier run
-   * of layout analysis.
-   */
-  void GetBlockTextOrientations(int **block_orientation,
-                                bool **vertical_writing);
-
-  /** This method returns the string form of the specified unichar. */
-  const char *GetUnichar(int unichar_id) const;
-
-  /** Return the pointer to the i-th dawg loaded into tesseract_ object. */
-  const Dawg *GetDawg(int i) const;
-
-  /** Return the number of dawgs loaded into tesseract_ object. */
-  int NumDawgs() const;
-
-  Tesseract *tesseract() const {
-    return tesseract_;
-  }
-
-  OcrEngineMode oem() const {
-    return last_oem_requested_;
-  }
-
-  void set_min_orientation_margin(double margin);
-  /* @} */
-
-protected:
-  /** Common code for setting the image. Returns true if Init has been called.
-   */
-  bool InternalSetImage();
-
-  /**
-   * Run the thresholder to make the thresholded image. If pix is not nullptr,
-   * the source is thresholded to pix instead of the internal IMAGE.
-   */
-  virtual bool Threshold(Pix **pix);
-
-  /**
-   * Find lines from the image making the BLOCK_LIST.
-   * @return 0 on success.
-   */
-  int FindLines();
-
-  /** Delete the pageres and block list ready for a new page. */
-  void ClearResults();
-
-  /**
-   * Return an LTR Result Iterator -- used only for training, as we really want
-   * to ignore all BiDi smarts at that point.
-   * delete once you're done with it.
-   */
-  LTRResultIterator *GetLTRIterator();
-
-  /**
-   * Return the length of the output text string, as UTF8, assuming
-   * one newline per line and one per block, with a terminator,
-   * and assuming a single character reject marker for each rejected character.
-   * Also return the number of recognized blobs in blob_count.
-   */
-  int TextLength(int *blob_count) const;
-
-  //// paragraphs.cpp ////////////////////////////////////////////////////
-  void DetectParagraphs(bool after_text_recognition);
-
-  const PAGE_RES *GetPageRes() const {
-    return page_res_;
-  }
-
-protected:
-  Tesseract *tesseract_;          ///< The underlying data object.
-  Tesseract *osd_tesseract_;      ///< For orientation & script detection.
-  EquationDetect *equ_detect_;    ///< The equation detector.
-  FileReader reader_;             ///< Reads files from any filesystem.
-  ImageThresholder *thresholder_; ///< Image thresholding module.
-  std::vector<ParagraphModel *> *paragraph_models_;
-  BLOCK_LIST *block_list_;           ///< The page layout.
-  PAGE_RES *page_res_;               ///< The page-level data.
-  std::string input_file_;           ///< Name used by training code.
-  std::string output_file_;          ///< Name used by debug code.
-  std::string datapath_;             ///< Current location of tessdata.
-  std::string language_;             ///< Last initialized language.
-  OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
-  bool recognition_done_;            ///< page_res_ contains recognition data.
-
-  /**
-   * @defgroup ThresholderParams Thresholder Parameters
-   * Parameters saved from the Thresholder. Needed to rebuild coordinates.
-   */
-  /* @{ */
-  int rect_left_;
-  int rect_top_;
-  int rect_width_;
-  int rect_height_;
-  int image_width_;
-  int image_height_;
-  /* @} */
-
-private:
-  // A list of image filenames gets special consideration
-  bool ProcessPagesFileList(FILE *fp, std::string *buf,
-                            const char *retry_config, int timeout_millisec,
-                            TessResultRenderer *renderer,
-                            int tessedit_page_number);
-  // TIFF supports multipage so gets special consideration.
-  bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
-                                 const char *filename, const char *retry_config,
-                                 int timeout_millisec,
-                                 TessResultRenderer *renderer,
-                                 int tessedit_page_number);
-}; // class TessBaseAPI.
-
-/** Escape a char string - remove &<>"' with HTML codes. */
-std::string HOcrEscape(const char *text);
-
-} // namespace tesseract
-
-#endif // TESSERACT_API_BASEAPI_H_
--- a/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/capi.h
+++ b/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/capi.h
@ -1,484 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        capi.h
-// Description: C-API TessBaseAPI
-//
-// (C) Copyright 2012, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef API_CAPI_H_
-#define API_CAPI_H_
-
-#include "export.h"
-
-#ifdef __cplusplus
-#  include <tesseract/baseapi.h>
-#  include <tesseract/ocrclass.h>
-#  include <tesseract/pageiterator.h>
-#  include <tesseract/renderer.h>
-#  include <tesseract/resultiterator.h>
-#endif
-
-#include <stdbool.h>
-#include <stdio.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef BOOL
-#  define BOOL int
-#  define TRUE 1
-#  define FALSE 0
-#endif
-
-#ifdef __cplusplus
-typedef tesseract::TessResultRenderer TessResultRenderer;
-typedef tesseract::TessBaseAPI TessBaseAPI;
-typedef tesseract::PageIterator TessPageIterator;
-typedef tesseract::ResultIterator TessResultIterator;
-typedef tesseract::MutableIterator TessMutableIterator;
-typedef tesseract::ChoiceIterator TessChoiceIterator;
-typedef tesseract::OcrEngineMode TessOcrEngineMode;
-typedef tesseract::PageSegMode TessPageSegMode;
-typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
-typedef tesseract::Orientation TessOrientation;
-typedef tesseract::ParagraphJustification TessParagraphJustification;
-typedef tesseract::WritingDirection TessWritingDirection;
-typedef tesseract::TextlineOrder TessTextlineOrder;
-typedef tesseract::PolyBlockType TessPolyBlockType;
-typedef tesseract::ETEXT_DESC ETEXT_DESC;
-#else
-typedef struct TessResultRenderer TessResultRenderer;
-typedef struct TessBaseAPI TessBaseAPI;
-typedef struct TessPageIterator TessPageIterator;
-typedef struct TessResultIterator TessResultIterator;
-typedef struct TessMutableIterator TessMutableIterator;
-typedef struct TessChoiceIterator TessChoiceIterator;
-typedef enum TessOcrEngineMode {
-  OEM_TESSERACT_ONLY,
-  OEM_LSTM_ONLY,
-  OEM_TESSERACT_LSTM_COMBINED,
-  OEM_DEFAULT
-} TessOcrEngineMode;
-typedef enum TessPageSegMode {
-  PSM_OSD_ONLY,
-  PSM_AUTO_OSD,
-  PSM_AUTO_ONLY,
-  PSM_AUTO,
-  PSM_SINGLE_COLUMN,
-  PSM_SINGLE_BLOCK_VERT_TEXT,
-  PSM_SINGLE_BLOCK,
-  PSM_SINGLE_LINE,
-  PSM_SINGLE_WORD,
-  PSM_CIRCLE_WORD,
-  PSM_SINGLE_CHAR,
-  PSM_SPARSE_TEXT,
-  PSM_SPARSE_TEXT_OSD,
-  PSM_RAW_LINE,
-  PSM_COUNT
-} TessPageSegMode;
-typedef enum TessPageIteratorLevel {
-  RIL_BLOCK,
-  RIL_PARA,
-  RIL_TEXTLINE,
-  RIL_WORD,
-  RIL_SYMBOL
-} TessPageIteratorLevel;
-typedef enum TessPolyBlockType {
-  PT_UNKNOWN,
-  PT_FLOWING_TEXT,
-  PT_HEADING_TEXT,
-  PT_PULLOUT_TEXT,
-  PT_EQUATION,
-  PT_INLINE_EQUATION,
-  PT_TABLE,
-  PT_VERTICAL_TEXT,
-  PT_CAPTION_TEXT,
-  PT_FLOWING_IMAGE,
-  PT_HEADING_IMAGE,
-  PT_PULLOUT_IMAGE,
-  PT_HORZ_LINE,
-  PT_VERT_LINE,
-  PT_NOISE,
-  PT_COUNT
-} TessPolyBlockType;
-typedef enum TessOrientation {
-  ORIENTATION_PAGE_UP,
-  ORIENTATION_PAGE_RIGHT,
-  ORIENTATION_PAGE_DOWN,
-  ORIENTATION_PAGE_LEFT
-} TessOrientation;
-typedef enum TessParagraphJustification {
-  JUSTIFICATION_UNKNOWN,
-  JUSTIFICATION_LEFT,
-  JUSTIFICATION_CENTER,
-  JUSTIFICATION_RIGHT
-} TessParagraphJustification;
-typedef enum TessWritingDirection {
-  WRITING_DIRECTION_LEFT_TO_RIGHT,
-  WRITING_DIRECTION_RIGHT_TO_LEFT,
-  WRITING_DIRECTION_TOP_TO_BOTTOM
-} TessWritingDirection;
-typedef enum TessTextlineOrder {
-  TEXTLINE_ORDER_LEFT_TO_RIGHT,
-  TEXTLINE_ORDER_RIGHT_TO_LEFT,
-  TEXTLINE_ORDER_TOP_TO_BOTTOM
-} TessTextlineOrder;
-typedef struct ETEXT_DESC ETEXT_DESC;
-#endif
-
-typedef bool (*TessCancelFunc)(void *cancel_this, int words);
-typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
-                                 int bottom);
-
-struct Pix;
-struct Boxa;
-struct Pixa;
-
-/* General free functions */
-
-TESS_API const char *TessVersion();
-TESS_API void TessDeleteText(const char *text);
-TESS_API void TessDeleteTextArray(char **arr);
-TESS_API void TessDeleteIntArray(const int *arr);
-
-/* Renderer API */
-TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
-                                                     BOOL font_info);
-TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
-                                                   const char *datadir,
-                                                   BOOL textonly);
-TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
-    const char *outputbase);
-
-TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
-TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
-                                       TessResultRenderer *next);
-TESS_API TessResultRenderer *TessResultRendererNext(
-    TessResultRenderer *renderer);
-TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
-                                              const char *title);
-TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
-                                         TessBaseAPI *api);
-TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
-
-TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
-TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
-TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
-
-/* Base API */
-
-TESS_API TessBaseAPI *TessBaseAPICreate();
-TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
-
-TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
-
-TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
-TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
-TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
-TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
-
-TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
-                                     const char *value);
-TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
-                                          const char *value);
-
-TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
-                                        const char *name, int *value);
-TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
-                                         const char *name, BOOL *value);
-TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
-                                           const char *name, double *value);
-TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
-                                                  const char *name);
-
-TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
-TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
-                                              const char *filename);
-
-TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode oem,
-                              char **configs, int configs_size);
-TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode oem);
-TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
-                              const char *language);
-
-TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode mode,
-                              char **configs, int configs_size, char **vars_vec,
-                              char **vars_values, size_t vars_vec_size,
-                              BOOL set_only_non_debug_params);
-
-TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
-                              const char *language, TessOcrEngineMode mode,
-                              char **configs, int configs_size, char **vars_vec,
-                              char **vars_values, size_t vars_vec_size,
-                              BOOL set_only_non_debug_params);
-
-TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
-    const TessBaseAPI *handle);
-TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
-    const TessBaseAPI *handle);
-TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
-    const TessBaseAPI *handle);
-
-TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
-                                        const char *filename);
-TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
-                                             const char *filename);
-
-TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
-                                        TessPageSegMode mode);
-TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
-
-TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
-                               const unsigned char *imagedata,
-                               int bytes_per_pixel, int bytes_per_line,
-                               int left, int top, int width, int height);
-
-TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
-                                  const unsigned char *imagedata, int width,
-                                  int height, int bytes_per_pixel,
-                                  int bytes_per_line);
-TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
-
-TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
-
-TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
-                                      int width, int height);
-
-TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
-TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
-                                            struct Pixa **pixa);
-TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
-                                              struct Pixa **pixa,
-                                              int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
-                                               BOOL raw_image, int raw_padding,
-                                               struct Pixa **pixa,
-                                               int **blockids, int **paraids);
-TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
-                                           struct Pixa **pixa, int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
-                                          struct Pixa **pixa);
-TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
-                                                        struct Pixa **cc);
-TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
-                                                    TessPageIteratorLevel level,
-                                                    BOOL text_only,
-                                                    struct Pixa **pixa,
-                                                    int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
-    TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
-    BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
-    int **paraids);
-
-TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
-    const TessBaseAPI *handle);
-
-TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
-
-TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
-                                      const char *retry_config,
-                                      int timeout_millisec,
-                                      TessResultRenderer *renderer);
-TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
-                                     int page_index, const char *filename,
-                                     const char *retry_config,
-                                     int timeout_millisec,
-                                     TessResultRenderer *renderer);
-
-TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
-TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
-    TessBaseAPI *handle);
-
-TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
-TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
-
-TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
-
-TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
-                                            int page_number);
-
-TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
-TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
-
-TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
-
-#ifndef DISABLED_LEGACY_ENGINE
-TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
-                                        TessPageSegMode mode,
-                                        const char *wordstr);
-#endif // #ifndef DISABLED_LEGACY_ENGINE
-
-TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
-TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
-TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
-                                          float *out_slope);
-
-TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
-
-TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-// Call TessDeleteText(*best_script_name) to free memory allocated by this
-// function
-TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
-                                                 int *orient_deg,
-                                                 float *orient_conf,
-                                                 const char **script_name,
-                                                 float *script_conf);
-#endif // #ifndef DISABLED_LEGACY_ENGINE
-
-TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
-                                                 double margin);
-
-TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
-
-TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
-
-TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
-                                               int **block_orientation,
-                                               bool **vertical_writing);
-
-/* Page iterator */
-
-TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
-
-TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
-
-TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
-
-TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
-                                   TessPageIteratorLevel level);
-
-TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
-                                              TessPageIteratorLevel level);
-
-TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
-                                               TessPageIteratorLevel level,
-                                               TessPageIteratorLevel element);
-
-TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
-                                          TessPageIteratorLevel level,
-                                          int *left, int *top, int *right,
-                                          int *bottom);
-
-TESS_API TessPolyBlockType
-TessPageIteratorBlockType(const TessPageIterator *handle);
-
-TESS_API struct Pix *TessPageIteratorGetBinaryImage(
-    const TessPageIterator *handle, TessPageIteratorLevel level);
-
-TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
-                                              TessPageIteratorLevel level,
-                                              int padding,
-                                              struct Pix *original_image,
-                                              int *left, int *top);
-
-TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
-                                       TessPageIteratorLevel level, int *x1,
-                                       int *y1, int *x2, int *y2);
-
-TESS_API void TessPageIteratorOrientation(
-    TessPageIterator *handle, TessOrientation *orientation,
-    TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
-    float *deskew_angle);
-
-TESS_API void TessPageIteratorParagraphInfo(
-    TessPageIterator *handle, TessParagraphJustification *justification,
-    BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
-
-/* Result iterator */
-
-TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
-TESS_API TessResultIterator *TessResultIteratorCopy(
-    const TessResultIterator *handle);
-TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
-    TessResultIterator *handle);
-TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
-    const TessResultIterator *handle);
-TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
-    const TessResultIterator *handle);
-
-TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
-                                     TessPageIteratorLevel level);
-TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
-                                             TessPageIteratorLevel level);
-TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
-                                            TessPageIteratorLevel level);
-TESS_API const char *TessResultIteratorWordRecognitionLanguage(
-    const TessResultIterator *handle);
-TESS_API const char *TessResultIteratorWordFontAttributes(
-    const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
-    BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
-    int *pointsize, int *font_id);
-
-TESS_API BOOL
-TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
-TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
-
-TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
-TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
-TESS_API const char *TessChoiceIteratorGetUTF8Text(
-    const TessChoiceIterator *handle);
-TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
-
-/* Progress monitor */
-
-TESS_API ETEXT_DESC *TessMonitorCreate();
-TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
-                                       TessCancelFunc cancelFunc);
-TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
-TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
-                                         TessProgressFunc progressFunc);
-TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // API_CAPI_H_
--- a/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/export.h
+++ b/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/export.h
@ -1,37 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        export.h
-// Description: Place holder
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_PLATFORM_H_
-#define TESSERACT_PLATFORM_H_
-
-#ifndef TESS_API
-#  if defined(_WIN32) || defined(__CYGWIN__)
-#    if defined(TESS_EXPORTS)
-#      define TESS_API __declspec(dllexport)
-#    elif defined(TESS_IMPORTS)
-#      define TESS_API __declspec(dllimport)
-#    else
-#      define TESS_API
-#    endif
-#  else
-#    if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
-#      define TESS_API __attribute__((visibility("default")))
-#    else
-#      define TESS_API
-#    endif
-#  endif
-#endif
-
-#endif // TESSERACT_PLATFORM_H_
--- a/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/ltrresultiterator.h
+++ b/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/ltrresultiterator.h
@ -1,235 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        ltrresultiterator.h
-// Description: Iterator for tesseract results in strict left-to-right
-//              order that avoids using tesseract internal data structures.
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
-#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
-
-#include "export.h"       // for TESS_API
-#include "pageiterator.h" // for PageIterator
-#include "publictypes.h"  // for PageIteratorLevel
-#include "unichar.h"      // for StrongScriptDirection
-
-namespace tesseract {
-
-class BLOB_CHOICE_IT;
-class PAGE_RES;
-class WERD_RES;
-
-class Tesseract;
-
-// Class to iterate over tesseract results, providing access to all levels
-// of the page hierarchy, without including any tesseract headers or having
-// to handle any tesseract structures.
-// WARNING! This class points to data held within the TessBaseAPI class, and
-// therefore can only be used while the TessBaseAPI class still exists and
-// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-// DetectOS, or anything else that changes the internal PAGE_RES.
-// See tesseract/publictypes.h for the definition of PageIteratorLevel.
-// See also base class PageIterator, which contains the bulk of the interface.
-// LTRResultIterator adds text-specific methods for access to OCR output.
-
-class TESS_API LTRResultIterator : public PageIterator {
-  friend class ChoiceIterator;
-
-public:
-  // page_res and tesseract come directly from the BaseAPI.
-  // The rectangle parameters are copied indirectly from the Thresholder,
-  // via the BaseAPI. They represent the coordinates of some rectangle in an
-  // original image (in top-left-origin coordinates) and therefore the top-left
-  // needs to be added to any output boxes in order to specify coordinates
-  // in the original image. See TessBaseAPI::SetRectangle.
-  // The scale and scaled_yres are in case the Thresholder scaled the image
-  // rectangle prior to thresholding. Any coordinates in tesseract's image
-  // must be divided by scale before adding (rect_left, rect_top).
-  // The scaled_yres indicates the effective resolution of the binary image
-  // that tesseract has been given by the Thresholder.
-  // After the constructor, Begin has already been called.
-  LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
-                    int scaled_yres, int rect_left, int rect_top,
-                    int rect_width, int rect_height);
-
-  ~LTRResultIterator() override;
-
-  // LTRResultIterators may be copied! This makes it possible to iterate over
-  // all the objects at a lower level, while maintaining an iterator to
-  // objects at a higher level. These constructors DO NOT CALL Begin, so
-  // iterations will continue from the location of src.
-  // TODO: For now the copy constructor and operator= only need the base class
-  // versions, but if new data members are added, don't forget to add them!
-
-  // ============= Moving around within the page ============.
-
-  // See PageIterator.
-
-  // ============= Accessing data ==============.
-
-  // Returns the null terminated UTF-8 encoded text string for the current
-  // object at the given level. Use delete [] to free after use.
-  char *GetUTF8Text(PageIteratorLevel level) const;
-
-  // Set the string inserted at the end of each text line. "\n" by default.
-  void SetLineSeparator(const char *new_line);
-
-  // Set the string inserted at the end of each paragraph. "\n" by default.
-  void SetParagraphSeparator(const char *new_para);
-
-  // Returns the mean confidence of the current object at the given level.
-  // The number should be interpreted as a percent probability. (0.0f-100.0f)
-  float Confidence(PageIteratorLevel level) const;
-
-  // ============= Functions that refer to words only ============.
-
-  // Returns the font attributes of the current word. If iterating at a higher
-  // level object than words, eg textlines, then this will return the
-  // attributes of the first word in that textline.
-  // The actual return value is a string representing a font name. It points
-  // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
-  // the iterator itself, ie rendered invalid by various members of
-  // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
-  // Pointsize is returned in printers points (1/72 inch.)
-  const char *WordFontAttributes(bool *is_bold, bool *is_italic,
-                                 bool *is_underlined, bool *is_monospace,
-                                 bool *is_serif, bool *is_smallcaps,
-                                 int *pointsize, int *font_id) const;
-
-  // Return the name of the language used to recognize this word.
-  // On error, nullptr.  Do not delete this pointer.
-  const char *WordRecognitionLanguage() const;
-
-  // Return the overall directionality of this word.
-  StrongScriptDirection WordDirection() const;
-
-  // Returns true if the current word was found in a dictionary.
-  bool WordIsFromDictionary() const;
-
-  // Returns the number of blanks before the current word.
-  int BlanksBeforeWord() const;
-
-  // Returns true if the current word is numeric.
-  bool WordIsNumeric() const;
-
-  // Returns true if the word contains blamer information.
-  bool HasBlamerInfo() const;
-
-  // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
-  // of the current word.
-  const void *GetParamsTrainingBundle() const;
-
-  // Returns a pointer to the string with blamer information for this word.
-  // Assumes that the word's blamer_bundle is not nullptr.
-  const char *GetBlamerDebug() const;
-
-  // Returns a pointer to the string with misadaption information for this word.
-  // Assumes that the word's blamer_bundle is not nullptr.
-  const char *GetBlamerMisadaptionDebug() const;
-
-  // Returns true if a truth string was recorded for the current word.
-  bool HasTruthString() const;
-
-  // Returns true if the given string is equivalent to the truth string for
-  // the current word.
-  bool EquivalentToTruth(const char *str) const;
-
-  // Returns a null terminated UTF-8 encoded truth string for the current word.
-  // Use delete [] to free after use.
-  char *WordTruthUTF8Text() const;
-
-  // Returns a null terminated UTF-8 encoded normalized OCR string for the
-  // current word. Use delete [] to free after use.
-  char *WordNormedUTF8Text() const;
-
-  // Returns a pointer to serialized choice lattice.
-  // Fills lattice_size with the number of bytes in lattice data.
-  const char *WordLattice(int *lattice_size) const;
-
-  // ============= Functions that refer to symbols only ============.
-
-  // Returns true if the current symbol is a superscript.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsSuperscript() const;
-  // Returns true if the current symbol is a subscript.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsSubscript() const;
-  // Returns true if the current symbol is a dropcap.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsDropcap() const;
-
-protected:
-  const char *line_separator_;
-  const char *paragraph_separator_;
-};
-
-// Class to iterate over the classifier choices for a single RIL_SYMBOL.
-class TESS_API ChoiceIterator {
-public:
-  // Construction is from a LTRResultIterator that points to the symbol of
-  // interest. The ChoiceIterator allows a one-shot iteration over the
-  // choices for this symbol and after that it is useless.
-  explicit ChoiceIterator(const LTRResultIterator &result_it);
-  ~ChoiceIterator();
-
-  // Moves to the next choice for the symbol and returns false if there
-  // are none left.
-  bool Next();
-
-  // ============= Accessing data ==============.
-
-  // Returns the null terminated UTF-8 encoded text string for the current
-  // choice.
-  // NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
-  // internal structure and should NOT be delete[]ed to free after use.
-  const char *GetUTF8Text() const;
-
-  // Returns the confidence of the current choice depending on the used language
-  // data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
-  // choices for one symbol should roughly add up to 1.0f.
-  // If only traineddata of the legacy engine is used, the number should be
-  // interpreted as a percent probability. (0.0f-100.0f) In this case
-  // probabilities won't add up to 100. Each one stands on its own.
-  float Confidence() const;
-
-  // Returns a vector containing all timesteps, which belong to the currently
-  // selected symbol. A timestep is a vector containing pairs of symbols and
-  // floating point numbers. The number states the probability for the
-  // corresponding symbol.
-  std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
-
-private:
-  // clears the remaining spaces out of the results and adapt the probabilities
-  void filterSpaces();
-  // Pointer to the WERD_RES object owned by the API.
-  WERD_RES *word_res_;
-  // Iterator over the blob choices.
-  BLOB_CHOICE_IT *choice_it_;
-  std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
-  std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
-
-  const int *tstep_index_;
-  // regulates the rating granularity
-  double rating_coefficient_;
-  // leading blanks
-  int blanks_before_word_;
-  // true when there is lstm engine related trained data
-  bool oemLSTM_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/ocrclass.h
+++ b/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/ocrclass.h
@ -1,158 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-/**********************************************************************
- * File:        ocrclass.h
- * Description: Class definitions and constants for the OCR API.
- * Author:      Hewlett-Packard Co
- *
- * (C) Copyright 1996, Hewlett-Packard Co.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-/**********************************************************************
- * This file contains typedefs for all the structures used by
- * the HP OCR interface.
- * The structures are designed to allow them to be used with any
- * structure alignment up to 8.
- **********************************************************************/
-
-#ifndef CCUTIL_OCRCLASS_H_
-#define CCUTIL_OCRCLASS_H_
-
-#include <chrono>
-#include <ctime>
-
-namespace tesseract {
-
-/**********************************************************************
- * EANYCODE_CHAR
- * Description of a single character. The character code is defined by
- * the character set of the current font.
- * Output text is sent as an array of these structures.
- * Spaces and line endings in the output are represented in the
- * structures of the surrounding characters. They are not directly
- * represented as characters.
- * The first character in a word has a positive value of blanks.
- * Missing information should be set to the defaults in the comments.
- * If word bounds are known, but not character bounds, then the top and
- * bottom of each character should be those of the word. The left of the
- * first and right of the last char in each word should be set. All other
- * lefts and rights should be set to -1.
- * If set, the values of right and bottom are left+width and top+height.
- * Most of the members come directly from the parameters to ocr_append_char.
- * The formatting member uses the enhancement parameter and combines the
- * line direction stuff into the top 3 bits.
- * The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
- * 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
- * the coding is, only that it is backwards compatible with the previous
- * version.
- **********************************************************************/
-
-struct EANYCODE_CHAR { /*single character */
-  // It should be noted that the format for char_code for version 2.0 and beyond
-  // is UTF8 which means that ASCII characters will come out as one structure
-  // but other characters will be returned in two or more instances of this
-  // structure with a single byte of the  UTF8 code in each, but each will have
-  // the same bounding box. Programs which want to handle languagues with
-  // different characters sets will need to handle extended characters
-  // appropriately, but *all* code needs to be prepared to receive UTF8 coded
-  // characters for characters such as bullet and fancy quotes.
-  uint16_t char_code; /*character itself */
-  int16_t left;       /*of char (-1) */
-  int16_t right;      /*of char (-1) */
-  int16_t top;        /*of char (-1) */
-  int16_t bottom;     /*of char (-1) */
-  int16_t font_index; /*what font (0) */
-  uint8_t confidence; /*0=perfect, 100=reject (0/100) */
-  uint8_t point_size; /*of char, 72=i inch, (10) */
-  int8_t blanks;      /*no of spaces before this char (1) */
-  uint8_t formatting; /*char formatting (0) */
-};
-
-/**********************************************************************
- * ETEXT_DESC
- * Description of the output of the OCR engine.
- * This structure is used as both a progress monitor and the final
- * output header, since it needs to be a valid progress monitor while
- * the OCR engine is storing its output to shared memory.
- * During progress, all the buffer info is -1.
- * Progress starts at 0 and increases to 100 during OCR. No other constraint.
- * Additionally the progress callback contains the bounding box of the word that
- * is currently being processed.
- * Every progress callback, the OCR engine must set ocr_alive to 1.
- * The HP side will set ocr_alive to 0. Repeated failure to reset
- * to 1 indicates that the OCR engine is dead.
- * If the cancel function is not null then it is called with the number of
- * user words found. If it returns true then operation is cancelled.
- **********************************************************************/
-class ETEXT_DESC;
-
-using CANCEL_FUNC = bool (*)(void *, int);
-using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
-using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
-
-class ETEXT_DESC { // output header
-public:
-  int16_t count{0};    /// chars in this buffer(0)
-  int16_t progress{0}; /// percent complete increasing (0-100)
-  /** Progress monitor covers word recognition and it does not cover layout
-   * analysis.
-   * See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
-  int8_t more_to_come{0};       /// true if not last
-  volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
-  int8_t err_code{0};           /// for errcode use
-  CANCEL_FUNC cancel{nullptr};  /// returns true to cancel
-  PROGRESS_FUNC progress_callback{
-      nullptr};                      /// called whenever progress increases
-  PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
-  void *cancel_this{nullptr};        /// this or other data for cancel
-  std::chrono::steady_clock::time_point end_time;
-  /// Time to stop. Expected to be set only
-  /// by call to set_deadline_msecs().
-  EANYCODE_CHAR text[1]{}; /// character data
-
-  ETEXT_DESC() : progress_callback2(&default_progress_func) {
-    end_time = std::chrono::time_point<std::chrono::steady_clock,
-                                       std::chrono::milliseconds>();
-  }
-
-  // Sets the end time to be deadline_msecs milliseconds from now.
-  void set_deadline_msecs(int32_t deadline_msecs) {
-    if (deadline_msecs > 0) {
-      end_time = std::chrono::steady_clock::now() +
-                 std::chrono::milliseconds(deadline_msecs);
-    }
-  }
-
-  // Returns false if we've not passed the end_time, or have not set a deadline.
-  bool deadline_exceeded() const {
-    if (end_time.time_since_epoch() ==
-        std::chrono::steady_clock::duration::zero()) {
-      return false;
-    }
-    auto now = std::chrono::steady_clock::now();
-    return (now > end_time);
-  }
-
-private:
-  static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
-                                    int top, int bottom) {
-    if (ths->progress_callback != nullptr) {
-      return (*(ths->progress_callback))(ths->progress, left, right, top,
-                                         bottom);
-    }
-    return true;
-  }
-};
-
-} // namespace tesseract
-
-#endif // CCUTIL_OCRCLASS_H_
--- a/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/osdetect.h
+++ b/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/osdetect.h
@ -1,139 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        osdetect.h
-// Description: Orientation and script detection.
-// Author:      Samuel Charron
-//              Ranjith Unnikrishnan
-//
-// (C) Copyright 2008, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_OSDETECT_H_
-#define TESSERACT_CCMAIN_OSDETECT_H_
-
-#include "export.h" // for TESS_API
-
-#include <vector> // for std::vector
-
-namespace tesseract {
-
-class BLOBNBOX;
-class BLOBNBOX_CLIST;
-class BLOB_CHOICE_LIST;
-class TO_BLOCK_LIST;
-class UNICHARSET;
-
-class Tesseract;
-
-// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
-const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
-
-struct OSBestResult {
-  OSBestResult()
-      : orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
-  int orientation_id;
-  int script_id;
-  float sconfidence;
-  float oconfidence;
-};
-
-struct OSResults {
-  OSResults() : unicharset(nullptr) {
-    for (int i = 0; i < 4; ++i) {
-      for (int j = 0; j < kMaxNumberOfScripts; ++j) {
-        scripts_na[i][j] = 0;
-      }
-      orientations[i] = 0;
-    }
-  }
-  void update_best_orientation();
-  // Set the estimate of the orientation to the given id.
-  void set_best_orientation(int orientation_id);
-  // Update/Compute the best estimate of the script assuming the given
-  // orientation id.
-  void update_best_script(int orientation_id);
-  // Return the index of the script with the highest score for this orientation.
-  TESS_API int get_best_script(int orientation_id) const;
-  // Accumulate scores with given OSResults instance and update the best script.
-  void accumulate(const OSResults &osr);
-
-  // Print statistics.
-  void print_scores(void) const;
-  void print_scores(int orientation_id) const;
-
-  // Array holding scores for each orientation id [0,3].
-  // Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
-  // page respectively, where the values refer to the amount of clockwise
-  // rotation to be applied to the page for the text to be upright and readable.
-  float orientations[4];
-  // Script confidence scores for each of 4 possible orientations.
-  float scripts_na[4][kMaxNumberOfScripts];
-
-  UNICHARSET *unicharset;
-  OSBestResult best_result;
-};
-
-class OrientationDetector {
-public:
-  OrientationDetector(const std::vector<int> *allowed_scripts,
-                      OSResults *results);
-  bool detect_blob(BLOB_CHOICE_LIST *scores);
-  int get_orientation();
-
-private:
-  OSResults *osr_;
-  const std::vector<int> *allowed_scripts_;
-};
-
-class ScriptDetector {
-public:
-  ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
-                 tesseract::Tesseract *tess);
-  void detect_blob(BLOB_CHOICE_LIST *scores);
-  bool must_stop(int orientation) const;
-
-private:
-  OSResults *osr_;
-  static const char *korean_script_;
-  static const char *japanese_script_;
-  static const char *fraktur_script_;
-  int korean_id_;
-  int japanese_id_;
-  int katakana_id_;
-  int hiragana_id_;
-  int han_id_;
-  int hangul_id_;
-  int latin_id_;
-  int fraktur_id_;
-  tesseract::Tesseract *tess_;
-  const std::vector<int> *allowed_scripts_;
-};
-
-int orientation_and_script_detection(const char *filename, OSResults *,
-                                     tesseract::Tesseract *);
-
-int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
-              tesseract::Tesseract *tess);
-
-int os_detect_blobs(const std::vector<int> *allowed_scripts,
-                    BLOBNBOX_CLIST *blob_list, OSResults *osr,
-                    tesseract::Tesseract *tess);
-
-bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
-                    OSResults *, tesseract::Tesseract *tess);
-
-// Helper method to convert an orientation index to its value in degrees.
-// The value represents the amount of clockwise rotation in degrees that must be
-// applied for the text to be upright (readable).
-TESS_API int OrientationIdToValue(const int &id);
-
-} // namespace tesseract
-
-#endif // TESSERACT_CCMAIN_OSDETECT_H_
--- a/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/pageiterator.h
+++ b/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/pageiterator.h
@ -1,364 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        pageiterator.h
-// Description: Iterator for tesseract page structure that avoids using
-//              tesseract internal data structures.
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
-#define TESSERACT_CCMAIN_PAGEITERATOR_H_
-
-#include "export.h"
-#include "publictypes.h"
-
-struct Pix;
-struct Pta;
-
-namespace tesseract {
-
-struct BlamerBundle;
-class C_BLOB_IT;
-class PAGE_RES;
-class PAGE_RES_IT;
-class WERD;
-
-class Tesseract;
-
-/**
- * Class to iterate over tesseract page structure, providing access to all
- * levels of the page hierarchy, without including any tesseract headers or
- * having to handle any tesseract structures.
- * WARNING! This class points to data held within the TessBaseAPI class, and
- * therefore can only be used while the TessBaseAPI class still exists and
- * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
- * DetectOS, or anything else that changes the internal PAGE_RES.
- * See tesseract/publictypes.h for the definition of PageIteratorLevel.
- * See also ResultIterator, derived from PageIterator, which adds in the
- * ability to access OCR output with text-specific methods.
- */
-
-class TESS_API PageIterator {
-public:
-  /**
-   * page_res and tesseract come directly from the BaseAPI.
-   * The rectangle parameters are copied indirectly from the Thresholder,
-   * via the BaseAPI. They represent the coordinates of some rectangle in an
-   * original image (in top-left-origin coordinates) and therefore the top-left
-   * needs to be added to any output boxes in order to specify coordinates
-   * in the original image. See TessBaseAPI::SetRectangle.
-   * The scale and scaled_yres are in case the Thresholder scaled the image
-   * rectangle prior to thresholding. Any coordinates in tesseract's image
-   * must be divided by scale before adding (rect_left, rect_top).
-   * The scaled_yres indicates the effective resolution of the binary image
-   * that tesseract has been given by the Thresholder.
-   * After the constructor, Begin has already been called.
-   */
-  PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
-               int scaled_yres, int rect_left, int rect_top, int rect_width,
-               int rect_height);
-  virtual ~PageIterator();
-
-  /**
-   * Page/ResultIterators may be copied! This makes it possible to iterate over
-   * all the objects at a lower level, while maintaining an iterator to
-   * objects at a higher level. These constructors DO NOT CALL Begin, so
-   * iterations will continue from the location of src.
-   */
-  PageIterator(const PageIterator &src);
-  const PageIterator &operator=(const PageIterator &src);
-
-  /** Are we positioned at the same location as other? */
-  bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
-
-  // ============= Moving around within the page ============.
-
-  /**
-   * Moves the iterator to point to the start of the page to begin an
-   * iteration.
-   */
-  virtual void Begin();
-
-  /**
-   * Moves the iterator to the beginning of the paragraph.
-   * This class implements this functionality by moving it to the zero indexed
-   * blob of the first (leftmost) word on the first row of the paragraph.
-   */
-  virtual void RestartParagraph();
-
-  /**
-   * Return whether this iterator points anywhere in the first textline of a
-   * paragraph.
-   */
-  bool IsWithinFirstTextlineOfParagraph() const;
-
-  /**
-   * Moves the iterator to the beginning of the text line.
-   * This class implements this functionality by moving it to the zero indexed
-   * blob of the first (leftmost) word of the row.
-   */
-  virtual void RestartRow();
-
-  /**
-   * Moves to the start of the next object at the given level in the
-   * page hierarchy, and returns false if the end of the page was reached.
-   * NOTE that RIL_SYMBOL will skip non-text blocks, but all other
-   * PageIteratorLevel level values will visit each non-text block once.
-   * Think of non text blocks as containing a single para, with a single line,
-   * with a single imaginary word.
-   * Calls to Next with different levels may be freely intermixed.
-   * This function iterates words in right-to-left scripts correctly, if
-   * the appropriate language has been loaded into Tesseract.
-   */
-  virtual bool Next(PageIteratorLevel level);
-
-  /**
-   * Returns true if the iterator is at the start of an object at the given
-   * level.
-   *
-   * For instance, suppose an iterator it is pointed to the first symbol of the
-   * first word of the third line of the second paragraph of the first block in
-   * a page, then:
-   *   it.IsAtBeginningOf(RIL_BLOCK) = false
-   *   it.IsAtBeginningOf(RIL_PARA) = false
-   *   it.IsAtBeginningOf(RIL_TEXTLINE) = true
-   *   it.IsAtBeginningOf(RIL_WORD) = true
-   *   it.IsAtBeginningOf(RIL_SYMBOL) = true
-   */
-  virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
-
-  /**
-   * Returns whether the iterator is positioned at the last element in a
-   * given level. (e.g. the last word in a line, the last line in a block)
-   *
-   *     Here's some two-paragraph example
-   *   text.  It starts off innocuously
-   *   enough but quickly turns bizarre.
-   *     The author inserts a cornucopia
-   *   of words to guard against confused
-   *   references.
-   *
-   * Now take an iterator it pointed to the start of "bizarre."
-   *  it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
-   *  it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
-   *  it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
-   */
-  virtual bool IsAtFinalElement(PageIteratorLevel level,
-                                PageIteratorLevel element) const;
-
-  /**
-   * Returns whether this iterator is positioned
-   *   before other:   -1
-   *   equal to other:  0
-   *   after other:     1
-   */
-  int Cmp(const PageIterator &other) const;
-
-  // ============= Accessing data ==============.
-  // Coordinate system:
-  // Integer coordinates are at the cracks between the pixels.
-  // The top-left corner of the top-left pixel in the image is at (0,0).
-  // The bottom-right corner of the bottom-right pixel in the image is at
-  // (width, height).
-  // Every bounding box goes from the top-left of the top-left contained
-  // pixel to the bottom-right of the bottom-right contained pixel, so
-  // the bounding box of the single top-left pixel in the image is:
-  // (0,0)->(1,1).
-  // If an image rectangle has been set in the API, then returned coordinates
-  // relate to the original (full) image, rather than the rectangle.
-
-  /**
-   * Controls what to include in a bounding box. Bounding boxes of all levels
-   * between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
-   * Between layout analysis and recognition, it isn't known where all
-   * diacritics belong, so this control is used to include or exclude some
-   * diacritics that are above or below the main body of the word. In most cases
-   * where the placement is obvious, and after recognition, it doesn't make as
-   * much difference, as the diacritics will already be included in the word.
-   */
-  void SetBoundingBoxComponents(bool include_upper_dots,
-                                bool include_lower_dots) {
-    include_upper_dots_ = include_upper_dots;
-    include_lower_dots_ = include_lower_dots;
-  }
-
-  /**
-   * Returns the bounding rectangle of the current object at the given level.
-   * See comment on coordinate system above.
-   * Returns false if there is no such object at the current position.
-   * The returned bounding box is guaranteed to match the size and position
-   * of the image returned by GetBinaryImage, but may clip foreground pixels
-   * from a grey image. The padding argument to GetImage can be used to expand
-   * the image to include more foreground pixels. See GetImage below.
-   */
-  bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
-                   int *bottom) const;
-  bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
-                   int *right, int *bottom) const;
-  /**
-   * Returns the bounding rectangle of the object in a coordinate system of the
-   * working image rectangle having its origin at (rect_left_, rect_top_) with
-   * respect to the original image and is scaled by a factor scale_.
-   */
-  bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
-                           int *right, int *bottom) const;
-
-  /** Returns whether there is no object of a given level. */
-  bool Empty(PageIteratorLevel level) const;
-
-  /**
-   * Returns the type of the current block.
-   * See tesseract/publictypes.h for PolyBlockType.
-   */
-  PolyBlockType BlockType() const;
-
-  /**
-   * Returns the polygon outline of the current block. The returned Pta must
-   * be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
-   * of the polygon, and the last edge is the line segment between the last
-   * point and the first point. nullptr will be returned if the iterator is
-   * at the end of the document or layout analysis was not used.
-   */
-  Pta *BlockPolygon() const;
-
-  /**
-   * Returns a binary image of the current object at the given level.
-   * The position and size match the return from BoundingBoxInternal, and so
-   * this could be upscaled with respect to the original input image.
-   * Use pixDestroy to delete the image after use.
-   */
-  Pix *GetBinaryImage(PageIteratorLevel level) const;
-
-  /**
-   * Returns an image of the current object at the given level in greyscale
-   * if available in the input. To guarantee a binary image use BinaryImage.
-   * NOTE that in order to give the best possible image, the bounds are
-   * expanded slightly over the binary connected component, by the supplied
-   * padding, so the top-left position of the returned image is returned
-   * in (left,top). These will most likely not match the coordinates
-   * returned by BoundingBox.
-   * If you do not supply an original image, you will get a binary one.
-   * Use pixDestroy to delete the image after use.
-   */
-  Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
-                int *left, int *top) const;
-
-  /**
-   * Returns the baseline of the current object at the given level.
-   * The baseline is the line that passes through (x1, y1) and (x2, y2).
-   * WARNING: with vertical text, baselines may be vertical!
-   * Returns false if there is no baseline at the current position.
-   */
-  bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
-                int *y2) const;
-
-  // Returns the attributes of the current row.
-  void RowAttributes(float *row_height, float *descenders,
-                     float *ascenders) const;
-
-  /**
-   * Returns orientation for the block the iterator points to.
-   *   orientation, writing_direction, textline_order: see publictypes.h
-   *   deskew_angle: after rotating the block so the text orientation is
-   *                 upright, how many radians does one have to rotate the
-   *                 block anti-clockwise for it to be level?
-   *                   -Pi/4 <= deskew_angle <= Pi/4
-   */
-  void Orientation(tesseract::Orientation *orientation,
-                   tesseract::WritingDirection *writing_direction,
-                   tesseract::TextlineOrder *textline_order,
-                   float *deskew_angle) const;
-
-  /**
-   * Returns information about the current paragraph, if available.
-   *
-   *   justification -
-   *     LEFT if ragged right, or fully justified and script is left-to-right.
-   *     RIGHT if ragged left, or fully justified and script is right-to-left.
-   *     unknown if it looks like source code or we have very few lines.
-   *   is_list_item -
-   *     true if we believe this is a member of an ordered or unordered list.
-   *   is_crown -
-   *     true if the first line of the paragraph is aligned with the other
-   *     lines of the paragraph even though subsequent paragraphs have first
-   *     line indents.  This typically indicates that this is the continuation
-   *     of a previous paragraph or that it is the very first paragraph in
-   *     the chapter.
-   *   first_line_indent -
-   *     For LEFT aligned paragraphs, the first text line of paragraphs of
-   *     this kind are indented this many pixels from the left edge of the
-   *     rest of the paragraph.
-   *     for RIGHT aligned paragraphs, the first text line of paragraphs of
-   *     this kind are indented this many pixels from the right edge of the
-   *     rest of the paragraph.
-   *     NOTE 1: This value may be negative.
-   *     NOTE 2: if *is_crown == true, the first line of this paragraph is
-   *             actually flush, and first_line_indent is set to the "common"
-   *             first_line_indent for subsequent paragraphs in this block
-   *             of text.
-   */
-  void ParagraphInfo(tesseract::ParagraphJustification *justification,
-                     bool *is_list_item, bool *is_crown,
-                     int *first_line_indent) const;
-
-  // If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
-  // of the current word to the given pointer (takes ownership of the pointer)
-  // and returns true.
-  // Can only be used when iterating on the word level.
-  bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
-
-protected:
-  /**
-   * Sets up the internal data for iterating the blobs of a new word, then
-   * moves the iterator to the given offset.
-   */
-  void BeginWord(int offset);
-
-  /** Pointer to the page_res owned by the API. */
-  PAGE_RES *page_res_;
-  /** Pointer to the Tesseract object owned by the API. */
-  Tesseract *tesseract_;
-  /**
-   * The iterator to the page_res_. Owned by this ResultIterator.
-   * A pointer just to avoid dragging in Tesseract includes.
-   */
-  PAGE_RES_IT *it_;
-  /**
-   * The current input WERD being iterated. If there is an output from OCR,
-   * then word_ is nullptr. Owned by the API
-   */
-  WERD *word_;
-  /** The length of the current word_. */
-  int word_length_;
-  /** The current blob index within the word. */
-  int blob_index_;
-  /**
-   * Iterator to the blobs within the word. If nullptr, then we are iterating
-   * OCR results in the box_word.
-   * Owned by this ResultIterator.
-   */
-  C_BLOB_IT *cblob_it_;
-  /** Control over what to include in bounding boxes. */
-  bool include_upper_dots_;
-  bool include_lower_dots_;
-  /** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
-  int scale_;
-  int scaled_yres_;
-  int rect_left_;
-  int rect_top_;
-  int rect_width_;
-  int rect_height_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/publictypes.h
+++ b/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/publictypes.h
@ -1,281 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        publictypes.h
-// Description: Types used in both the API and internally
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
-#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
-
-namespace tesseract {
-
-// This file contains types that are used both by the API and internally
-// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
-// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
-// Restated: It is OK for low-level Tesseract files to include publictypes.h,
-// but not for the low-level tesseract code to include top-level API code.
-// This file should not use other Tesseract types, as that would drag
-// their includes into the API-level.
-
-/** Number of printers' points in an inch. The unit of the pointsize return. */
-constexpr int kPointsPerInch = 72;
-/**
- * Minimum believable resolution. Used as a default if there is no other
- * information, as it is safer to under-estimate than over-estimate.
- */
-constexpr int kMinCredibleResolution = 70;
-/** Maximum believable resolution.  */
-constexpr int kMaxCredibleResolution = 2400;
-/**
- * Ratio between median blob size and likely resolution. Used to estimate
- * resolution when none is provided. This is basically 1/usual text size in
- * inches.  */
-constexpr int kResolutionEstimationFactor = 10;
-
-/**
- * Possible types for a POLY_BLOCK or ColPartition.
- * Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
- * below, as well as kPolyBlockNames in layout_test.cc.
- * Used extensively by ColPartition, and POLY_BLOCK.
- */
-enum PolyBlockType {
-  PT_UNKNOWN,         // Type is not yet known. Keep as the first element.
-  PT_FLOWING_TEXT,    // Text that lives inside a column.
-  PT_HEADING_TEXT,    // Text that spans more than one column.
-  PT_PULLOUT_TEXT,    // Text that is in a cross-column pull-out region.
-  PT_EQUATION,        // Partition belonging to an equation region.
-  PT_INLINE_EQUATION, // Partition has inline equation.
-  PT_TABLE,           // Partition belonging to a table region.
-  PT_VERTICAL_TEXT,   // Text-line runs vertically.
-  PT_CAPTION_TEXT,    // Text that belongs to an image.
-  PT_FLOWING_IMAGE,   // Image that lives inside a column.
-  PT_HEADING_IMAGE,   // Image that spans more than one column.
-  PT_PULLOUT_IMAGE,   // Image that is in a cross-column pull-out region.
-  PT_HORZ_LINE,       // Horizontal Line.
-  PT_VERT_LINE,       // Vertical Line.
-  PT_NOISE,           // Lies outside of any column.
-  PT_COUNT
-};
-
-/** Returns true if PolyBlockType is of horizontal line type */
-inline bool PTIsLineType(PolyBlockType type) {
-  return type == PT_HORZ_LINE || type == PT_VERT_LINE;
-}
-/** Returns true if PolyBlockType is of image type */
-inline bool PTIsImageType(PolyBlockType type) {
-  return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
-         type == PT_PULLOUT_IMAGE;
-}
-/** Returns true if PolyBlockType is of text type */
-inline bool PTIsTextType(PolyBlockType type) {
-  return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
-         type == PT_PULLOUT_TEXT || type == PT_TABLE ||
-         type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
-         type == PT_INLINE_EQUATION;
-}
-// Returns true if PolyBlockType is of pullout(inter-column) type
-inline bool PTIsPulloutType(PolyBlockType type) {
-  return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
-}
-
-/**
- *  +------------------+  Orientation Example:
- *  | 1 Aaaa Aaaa Aaaa |  ====================
- *  | Aaa aa aaa aa    |  To left is a diagram of some (1) English and
- *  | aaaaaa A aa aaa. |  (2) Chinese text and a (3) photo credit.
- *  |                2 |
- *  |   #######  c c C |  Upright Latin characters are represented as A and a.
- *  |   #######  c c c |  '<' represents a latin character rotated
- *  | < #######  c c c |      anti-clockwise 90 degrees.
- *  | < #######  c   c |
- *  | < #######  .   c |  Upright Chinese characters are represented C and c.
- *  | 3 #######      c |
- *  +------------------+  NOTA BENE: enum values here should match goodoc.proto
-
- * If you orient your head so that "up" aligns with Orientation,
- * then the characters will appear "right side up" and readable.
- *
- * In the example above, both the English and Chinese paragraphs are oriented
- * so their "up" is the top of the page (page up).  The photo credit is read
- * with one's head turned leftward ("up" is to page left).
- *
- * The values of this enum match the convention of Tesseract's osdetect.h
-*/
-enum Orientation {
-  ORIENTATION_PAGE_UP = 0,
-  ORIENTATION_PAGE_RIGHT = 1,
-  ORIENTATION_PAGE_DOWN = 2,
-  ORIENTATION_PAGE_LEFT = 3,
-};
-
-/**
- * The grapheme clusters within a line of text are laid out logically
- * in this direction, judged when looking at the text line rotated so that
- * its Orientation is "page up".
- *
- * For English text, the writing direction is left-to-right.  For the
- * Chinese text in the above example, the writing direction is top-to-bottom.
- */
-enum WritingDirection {
-  WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
-  WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
-  WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
-};
-
-/**
- * The text lines are read in the given sequence.
- *
- * In English, the order is top-to-bottom.
- * In Chinese, vertical text lines are read right-to-left.  Mongolian is
- * written in vertical columns top to bottom like Chinese, but the lines
- * order left-to right.
- *
- * Note that only some combinations make sense.  For example,
- * WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
- */
-enum TextlineOrder {
-  TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
-  TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
-  TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
-};
-
-/**
- * Possible modes for page layout analysis. These *must* be kept in order
- * of decreasing amount of layout analysis to be done, except for OSD_ONLY,
- * so that the inequality test macros below work.
- */
-enum PageSegMode {
-  PSM_OSD_ONLY = 0,      ///< Orientation and script detection only.
-  PSM_AUTO_OSD = 1,      ///< Automatic page segmentation with orientation and
-                         ///< script detection. (OSD)
-  PSM_AUTO_ONLY = 2,     ///< Automatic page segmentation, but no OSD, or OCR.
-  PSM_AUTO = 3,          ///< Fully automatic page segmentation, but no OSD.
-  PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
-  PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
-                                  ///< vertically aligned text.
-  PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
-  PSM_SINGLE_LINE = 7,  ///< Treat the image as a single text line.
-  PSM_SINGLE_WORD = 8,  ///< Treat the image as a single word.
-  PSM_CIRCLE_WORD = 9,  ///< Treat the image as a single word in a circle.
-  PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
-  PSM_SPARSE_TEXT =
-      11, ///< Find as much text as possible in no particular order.
-  PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
-  PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
-                     ///< hacks that are Tesseract-specific.
-
-  PSM_COUNT ///< Number of enum entries.
-};
-
-/**
- * Inline functions that act on a PageSegMode to determine whether components of
- * layout analysis are enabled.
- * *Depend critically on the order of elements of PageSegMode.*
- * NOTE that arg is an int for compatibility with INT_PARAM.
- */
-inline bool PSM_OSD_ENABLED(int pageseg_mode) {
-  return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
-  return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
-}
-inline bool PSM_SPARSE(int pageseg_mode) {
-  return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
-}
-inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
-}
-inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
-  return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
-         pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-
-/**
- * enum of the elements of the page hierarchy, used in ResultIterator
- * to provide functions that operate on each level without having to
- * have 5x as many functions.
- */
-enum PageIteratorLevel {
-  RIL_BLOCK,    // Block of text/image/separator line.
-  RIL_PARA,     // Paragraph within a block.
-  RIL_TEXTLINE, // Line within a paragraph.
-  RIL_WORD,     // Word within a textline.
-  RIL_SYMBOL    // Symbol/character within a word.
-};
-
-/**
- * JUSTIFICATION_UNKNOWN
- *   The alignment is not clearly one of the other options.  This could happen
- *   for example if there are only one or two lines of text or the text looks
- *   like source code or poetry.
- *
- * NOTA BENE: Fully justified paragraphs (text aligned to both left and right
- *    margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
- *    is written with a left-to-right script and with JUSTIFICATION_RIGHT if
- *    their text is written in a right-to-left script.
- *
- * Interpretation for text read in vertical lines:
- *   "Left" is wherever the starting reading position is.
- *
- * JUSTIFICATION_LEFT
- *   Each line, except possibly the first, is flush to the same left tab stop.
- *
- * JUSTIFICATION_CENTER
- *   The text lines of the paragraph are centered about a line going
- *   down through their middle of the text lines.
- *
- * JUSTIFICATION_RIGHT
- *   Each line, except possibly the first, is flush to the same right tab stop.
- */
-enum ParagraphJustification {
-  JUSTIFICATION_UNKNOWN,
-  JUSTIFICATION_LEFT,
-  JUSTIFICATION_CENTER,
-  JUSTIFICATION_RIGHT,
-};
-
-/**
- * When Tesseract/Cube is initialized we can choose to instantiate/load/run
- * only the Tesseract part, only the Cube part or both along with the combiner.
- * The preference of which engine to use is stored in tessedit_ocr_engine_mode.
- *
- * ATTENTION: When modifying this enum, please make sure to make the
- * appropriate changes to all the enums mirroring it (e.g. OCREngine in
- * cityblock/workflow/detection/detection_storage.proto). Such enums will
- * mention the connection to OcrEngineMode in the comments.
- */
-enum OcrEngineMode {
-  OEM_TESSERACT_ONLY,          // Run Tesseract only - fastest; deprecated
-  OEM_LSTM_ONLY,               // Run just the LSTM line recognizer.
-  OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
-                               // to Tesseract when things get difficult.
-                               // deprecated
-  OEM_DEFAULT,                 // Specify this mode when calling init_*(),
-                               // to indicate that any of the above modes
-                               // should be automatically inferred from the
-                               // variables in the language-specific config,
-                               // command-line configs, or if not specified
-                               // in any of the above should be set to the
-                               // default OEM_TESSERACT_ONLY.
-  OEM_COUNT                    // Number of OEMs
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
--- a/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/renderer.h
+++ b/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/renderer.h
@ -1,311 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        renderer.h
-// Description: Rendering interface to inject into TessBaseAPI
-//
-// (C) Copyright 2011, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_RENDERER_H_
-#define TESSERACT_API_RENDERER_H_
-
-#include "export.h"
-
-// To avoid collision with other typenames include the ABSOLUTE MINIMUM
-// complexity of includes here. Use forward declarations wherever possible
-// and hide includes of complex types in baseapi.cpp.
-#include <cstdint>
-#include <string> // for std::string
-#include <vector> // for std::vector
-
-struct Pix;
-
-namespace tesseract {
-
-class TessBaseAPI;
-
-/**
- * Interface for rendering tesseract results into a document, such as text,
- * HOCR or pdf. This class is abstract. Specific classes handle individual
- * formats. This interface is then used to inject the renderer class into
- * tesseract when processing images.
- *
- * For simplicity implementing this with tesseract version 3.01,
- * the renderer contains document state that is cleared from document
- * to document just as the TessBaseAPI is. This way the base API can just
- * delegate its rendering functionality to injected renderers, and the
- * renderers can manage the associated state needed for the specific formats
- * in addition to the heuristics for producing it.
- */
-class TESS_API TessResultRenderer {
-public:
-  virtual ~TessResultRenderer();
-
-  // Takes ownership of pointer so must be new'd instance.
-  // Renderers aren't ordered, but appends the sequences of next parameter
-  // and existing next(). The renderers should be unique across both lists.
-  void insert(TessResultRenderer *next);
-
-  // Returns the next renderer or nullptr.
-  TessResultRenderer *next() {
-    return next_;
-  }
-
-  /**
-   * Starts a new document with the given title.
-   * This clears the contents of the output data.
-   * Title should use UTF-8 encoding.
-   */
-  bool BeginDocument(const char *title);
-
-  /**
-   * Adds the recognized text from the source image to the current document.
-   * Invalid if BeginDocument not yet called.
-   *
-   * Note that this API is a bit weird but is designed to fit into the
-   * current TessBaseAPI implementation where the api has lots of state
-   * information that we might want to add in.
-   */
-  bool AddImage(TessBaseAPI *api);
-
-  /**
-   * Finishes the document and finalizes the output data
-   * Invalid if BeginDocument not yet called.
-   */
-  bool EndDocument();
-
-  const char *file_extension() const {
-    return file_extension_;
-  }
-  const char *title() const {
-    return title_.c_str();
-  }
-
-  // Is everything fine? Otherwise something went wrong.
-  bool happy() const {
-    return happy_;
-  }
-
-  /**
-   * Returns the index of the last image given to AddImage
-   * (i.e. images are incremented whether the image succeeded or not)
-   *
-   * This is always defined. It means either the number of the
-   * current image, the last image ended, or in the completed document
-   * depending on when in the document lifecycle you are looking at it.
-   * Will return -1 if a document was never started.
-   */
-  int imagenum() const {
-    return imagenum_;
-  }
-
-protected:
-  /**
-   * Called by concrete classes.
-   *
-   * outputbase is the name of the output file excluding
-   * extension. For example, "/path/to/chocolate-chip-cookie-recipe"
-   *
-   * extension indicates the file extension to be used for output
-   * files. For example "pdf" will produce a .pdf file, and "hocr"
-   * will produce .hocr files.
-   */
-  TessResultRenderer(const char *outputbase, const char *extension);
-
-  // Hook for specialized handling in BeginDocument()
-  virtual bool BeginDocumentHandler();
-
-  // This must be overridden to render the OCR'd results
-  virtual bool AddImageHandler(TessBaseAPI *api) = 0;
-
-  // Hook for specialized handling in EndDocument()
-  virtual bool EndDocumentHandler();
-
-  // Renderers can call this to append '\0' terminated strings into
-  // the output string returned by GetOutput.
-  // This method will grow the output buffer if needed.
-  void AppendString(const char *s);
-
-  // Renderers can call this to append binary byte sequences into
-  // the output string returned by GetOutput. Note that s is not necessarily
-  // '\0' terminated (and can contain '\0' within it).
-  // This method will grow the output buffer if needed.
-  void AppendData(const char *s, int len);
-
-private:
-  TessResultRenderer *next_;   // Can link multiple renderers together
-  FILE *fout_;                 // output file pointer
-  const char *file_extension_; // standard extension for generated output
-  std::string title_;          // title of document being rendered
-  int imagenum_;               // index of last image added
-  bool happy_;                 // I get grumpy when the disk fills up, etc.
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessTextRenderer : public TessResultRenderer {
-public:
-  explicit TessTextRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into an hocr text string
- */
-class TESS_API TessHOcrRenderer : public TessResultRenderer {
-public:
-  explicit TessHOcrRenderer(const char *outputbase, bool font_info);
-  explicit TessHOcrRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool font_info_; // whether to print font information
-};
-
-/**
- * Renders tesseract output into an alto text string
- */
-class TESS_API TessAltoRenderer : public TessResultRenderer {
-public:
-  explicit TessAltoRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool begin_document;
-};
-
-/**
- * Renders Tesseract output into a TSV string
- */
-class TESS_API TessTsvRenderer : public TessResultRenderer {
-public:
-  explicit TessTsvRenderer(const char *outputbase, bool font_info);
-  explicit TessTsvRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool font_info_; // whether to print font information
-};
-
-/**
- * Renders tesseract output into searchable PDF
- */
-class TESS_API TessPDFRenderer : public TessResultRenderer {
-public:
-  // datadir is the location of the TESSDATA. We need it because
-  // we load a custom PDF font from this location.
-  TessPDFRenderer(const char *outputbase, const char *datadir,
-                  bool textonly = false);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  // We don't want to have every image in memory at once,
-  // so we store some metadata as we go along producing
-  // PDFs one page at a time. At the end, that metadata is
-  // used to make everything that isn't easily handled in a
-  // streaming fashion.
-  long int obj_;                  // counter for PDF objects
-  std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
-  std::vector<long int> pages_;   // object number for every /Page object
-  std::string datadir_;           // where to find the custom font
-  bool textonly_;                 // skip images if set
-  // Bookkeeping only. DIY = Do It Yourself.
-  void AppendPDFObjectDIY(size_t objectsize);
-  // Bookkeeping + emit data.
-  void AppendPDFObject(const char *data);
-  // Create the /Contents object for an entire page.
-  char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
-  // Turn an image into a PDF object. Only transcode if we have to.
-  static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
-                            char **pdf_object, long int *pdf_object_size,
-                            int jpg_quality);
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessUnlvRenderer : public TessResultRenderer {
-public:
-  explicit TessUnlvRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string for LSTMBox
- */
-class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
-public:
-  explicit TessLSTMBoxRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessBoxTextRenderer : public TessResultRenderer {
-public:
-  explicit TessBoxTextRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string in WordStr format
- */
-class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
-public:
-  explicit TessWordStrBoxRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-/**
- * Renders tesseract output into an osd text string
- */
-class TESS_API TessOsdRenderer : public TessResultRenderer {
-public:
-  explicit TessOsdRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-#endif // ndef DISABLED_LEGACY_ENGINE
-
-} // namespace tesseract.
-
-#endif // TESSERACT_API_RENDERER_H_
--- a/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/resultiterator.h
+++ b/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/resultiterator.h
@ -1,250 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        resultiterator.h
-// Description: Iterator for tesseract results that is capable of
-//              iterating in proper reading order over Bi Directional
-//              (e.g. mixed Hebrew and English) text.
-// Author:      David Eger
-//
-// (C) Copyright 2011, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
-#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
-
-#include "export.h"            // for TESS_API, TESS_LOCAL
-#include "ltrresultiterator.h" // for LTRResultIterator
-#include "publictypes.h"       // for PageIteratorLevel
-#include "unichar.h"           // for StrongScriptDirection
-
-#include <set>    // for std::pair
-#include <vector> // for std::vector
-
-namespace tesseract {
-
-class TESS_API ResultIterator : public LTRResultIterator {
-public:
-  static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
-
-  /**
-   * ResultIterator is copy constructible!
-   * The default copy constructor works just fine for us.
-   */
-  ~ResultIterator() override = default;
-
-  // ============= Moving around within the page ============.
-  /**
-   * Moves the iterator to point to the start of the page to begin
-   * an iteration.
-   */
-  void Begin() override;
-
-  /**
-   * Moves to the start of the next object at the given level in the
-   * page hierarchy in the appropriate reading order and returns false if
-   * the end of the page was reached.
-   * NOTE that RIL_SYMBOL will skip non-text blocks, but all other
-   * PageIteratorLevel level values will visit each non-text block once.
-   * Think of non text blocks as containing a single para, with a single line,
-   * with a single imaginary word.
-   * Calls to Next with different levels may be freely intermixed.
-   * This function iterates words in right-to-left scripts correctly, if
-   * the appropriate language has been loaded into Tesseract.
-   */
-  bool Next(PageIteratorLevel level) override;
-
-  /**
-   * IsAtBeginningOf() returns whether we're at the logical beginning of the
-   * given level.  (as opposed to ResultIterator's left-to-right top-to-bottom
-   * order).  Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
-   * For a full description, see pageiterator.h
-   */
-  bool IsAtBeginningOf(PageIteratorLevel level) const override;
-
-  /**
-   * Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
-   * For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
-   * point at the last word in a paragraph.  See PageIterator for full comment.
-   */
-  bool IsAtFinalElement(PageIteratorLevel level,
-                        PageIteratorLevel element) const override;
-
-  // ============= Functions that refer to words only ============.
-  // Returns the number of blanks before the current word.
-  int BlanksBeforeWord() const;
-
-  // ============= Accessing data ==============.
-
-  /**
-   * Returns the null terminated UTF-8 encoded text string for the current
-   * object at the given level. Use delete [] to free after use.
-   */
-  virtual char *GetUTF8Text(PageIteratorLevel level) const;
-
-  /**
-   * Returns the LSTM choices for every LSTM timestep for the current word.
-   */
-  virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
-      *GetRawLSTMTimesteps() const;
-  virtual std::vector<std::vector<std::pair<const char *, float>>>
-      *GetBestLSTMSymbolChoices() const;
-
-  /**
-   * Return whether the current paragraph's dominant reading direction
-   * is left-to-right (as opposed to right-to-left).
-   */
-  bool ParagraphIsLtr() const;
-
-  // ============= Exposed only for testing =============.
-
-  /**
-   * Yields the reading order as a sequence of indices and (optional)
-   * meta-marks for a set of words (given left-to-right).
-   * The meta marks are passed as negative values:
-   *   kMinorRunStart  Start of minor direction text.
-   *   kMinorRunEnd    End of minor direction text.
-   *   kComplexWord    The next indexed word contains both left-to-right and
-   *                    right-to-left characters and was treated as neutral.
-   *
-   * For example, suppose we have five words in a text line,
-   * indexed [0,1,2,3,4] from the leftmost side of the text line.
-   * The following are all believable reading_orders:
-   *
-   * Left-to-Right (in ltr paragraph):
-   *     { 0, 1, 2, 3, 4 }
-   * Left-to-Right (in rtl paragraph):
-   *     { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
-   * Right-to-Left (in rtl paragraph):
-   *     { 4, 3, 2, 1, 0 }
-   * Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
-   *     { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
-   */
-  static void CalculateTextlineOrder(
-      bool paragraph_is_ltr,
-      const std::vector<StrongScriptDirection> &word_dirs,
-      std::vector<int> *reading_order);
-
-  static const int kMinorRunStart;
-  static const int kMinorRunEnd;
-  static const int kComplexWord;
-
-protected:
-  /**
-   * We presume the data associated with the given iterator will outlive us.
-   * NB: This is private because it does something that is non-obvious:
-   *   it resets to the beginning of the paragraph instead of staying wherever
-   *   resit might have pointed.
-   */
-  explicit ResultIterator(const LTRResultIterator &resit);
-
-private:
-  /**
-   * Calculates the current paragraph's dominant writing direction.
-   * Typically, members should use current_paragraph_ltr_ instead.
-   */
-  bool CurrentParagraphIsLtr() const;
-
-  /**
-   * Returns word indices as measured from resit->RestartRow() = index 0
-   * for the reading order of words within a textline given an iterator
-   * into the middle of the text line.
-   * In addition to non-negative word indices, the following negative values
-   * may be inserted:
-   *   kMinorRunStart  Start of minor direction text.
-   *   kMinorRunEnd    End of minor direction text.
-   *   kComplexWord    The previous word contains both left-to-right and
-   *                   right-to-left characters and was treated as neutral.
-   */
-  void CalculateTextlineOrder(bool paragraph_is_ltr,
-                              const LTRResultIterator &resit,
-                              std::vector<int> *indices) const;
-  /** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
-  void CalculateTextlineOrder(bool paragraph_is_ltr,
-                              const LTRResultIterator &resit,
-                              std::vector<StrongScriptDirection> *ssd,
-                              std::vector<int> *indices) const;
-
-  /**
-   * What is the index of the current word in a strict left-to-right reading
-   * of the row?
-   */
-  int LTRWordIndex() const;
-
-  /**
-   * Given an iterator pointing at a word, returns the logical reading order
-   * of blob indices for the word.
-   */
-  void CalculateBlobOrder(std::vector<int> *blob_indices) const;
-
-  /** Precondition: current_paragraph_is_ltr_ is set. */
-  void MoveToLogicalStartOfTextline();
-
-  /**
-   * Precondition: current_paragraph_is_ltr_ and in_minor_direction_
-   * are set.
-   */
-  void MoveToLogicalStartOfWord();
-
-  /** Are we pointing at the final (reading order) symbol of the word? */
-  bool IsAtFinalSymbolOfWord() const;
-
-  /** Are we pointing at the first (reading order) symbol of the word? */
-  bool IsAtFirstSymbolOfWord() const;
-
-  /**
-   * Append any extra marks that should be appended to this word when printed.
-   * Mostly, these are Unicode BiDi control characters.
-   */
-  void AppendSuffixMarks(std::string *text) const;
-
-  /** Appends the current word in reading order to the given buffer.*/
-  void AppendUTF8WordText(std::string *text) const;
-
-  /**
-   * Appends the text of the current text line, *assuming this iterator is
-   * positioned at the beginning of the text line*  This function
-   * updates the iterator to point to the first position past the text line.
-   * Each textline is terminated in a single newline character.
-   * If the textline ends a paragraph, it gets a second terminal newline.
-   */
-  void IterateAndAppendUTF8TextlineText(std::string *text);
-
-  /**
-   * Appends the text of the current paragraph in reading order
-   * to the given buffer.
-   * Each textline is terminated in a single newline character, and the
-   * paragraph gets an extra newline at the end.
-   */
-  void AppendUTF8ParagraphText(std::string *text) const;
-
-  /** Returns whether the bidi_debug flag is set to at least min_level. */
-  bool BidiDebug(int min_level) const;
-
-  bool current_paragraph_is_ltr_;
-
-  /**
-   * Is the currently pointed-at character at the beginning of
-   * a minor-direction run?
-   */
-  bool at_beginning_of_minor_run_;
-
-  /** Is the currently pointed-at character in a minor-direction sequence? */
-  bool in_minor_direction_;
-
-  /**
-   * Should detected inter-word spaces be preserved, or "compressed" to a single
-   * space character (default behavior).
-   */
-  bool preserve_interword_spaces_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/unichar.h
+++ b/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/unichar.h
@ -1,174 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        unichar.h
-// Description: Unicode character/ligature class.
-// Author:      Ray Smith
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCUTIL_UNICHAR_H_
-#define TESSERACT_CCUTIL_UNICHAR_H_
-
-#include "export.h"
-
-#include <memory.h>
-#include <cstring>
-#include <string>
-#include <vector>
-
-namespace tesseract {
-
-// Maximum number of characters that can be stored in a UNICHAR. Must be
-// at least 4. Must not exceed 31 without changing the coding of length.
-#define UNICHAR_LEN 30
-
-// A UNICHAR_ID is the unique id of a unichar.
-using UNICHAR_ID = int;
-
-// A variable to indicate an invalid or uninitialized unichar id.
-static const int INVALID_UNICHAR_ID = -1;
-// A special unichar that corresponds to INVALID_UNICHAR_ID.
-static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
-
-enum StrongScriptDirection {
-  DIR_NEUTRAL = 0,       // Text contains only neutral characters.
-  DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
-  DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
-  DIR_MIX = 3,           // Text contains a mixture of left-to-right
-                         // and right-to-left characters.
-};
-
-using char32 = signed int;
-
-// The UNICHAR class holds a single classification result. This may be
-// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
-// multiple Unicode characters representing the NFKC expansion of a ligature
-// such as fi, ffl etc. These are also stored as utf8.
-class TESS_API UNICHAR {
-public:
-  UNICHAR() {
-    memset(chars, 0, UNICHAR_LEN);
-  }
-
-  // Construct from a utf8 string. If len<0 then the string is null terminated.
-  // If the string is too long to fit in the UNICHAR then it takes only what
-  // will fit.
-  UNICHAR(const char *utf8_str, int len);
-
-  // Construct from a single UCS4 character.
-  explicit UNICHAR(int unicode);
-
-  // Default copy constructor and operator= are OK.
-
-  // Get the first character as UCS-4.
-  int first_uni() const;
-
-  // Get the length of the UTF8 string.
-  int utf8_len() const {
-    int len = chars[UNICHAR_LEN - 1];
-    return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
-  }
-
-  // Get a UTF8 string, but NOT nullptr terminated.
-  const char *utf8() const {
-    return chars;
-  }
-
-  // Get a terminated UTF8 string: Must delete[] it after use.
-  char *utf8_str() const;
-
-  // Get the number of bytes in the first character of the given utf8 string.
-  static int utf8_step(const char *utf8_str);
-
-  // A class to simplify iterating over and accessing elements of a UTF8
-  // string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
-  // take ownership of the underlying byte array. It also does not permit
-  // modification of the array (as the name suggests).
-  //
-  // Example:
-  //   for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
-  //        it != UNICHAR::end(str, len);
-  //        ++it) {
-  //     printf("UCS-4 symbol code = %d\n", *it);
-  //     char buf[5];
-  //     int char_len = it.get_utf8(buf); buf[char_len] = '\0';
-  //     printf("Char = %s\n", buf);
-  //   }
-  class TESS_API const_iterator {
-    using CI = const_iterator;
-
-  public:
-    // Step to the next UTF8 character.
-    // If the current position is at an illegal UTF8 character, then print an
-    // error message and step by one byte. If the current position is at a
-    // nullptr value, don't step past it.
-    const_iterator &operator++();
-
-    // Return the UCS-4 value at the current position.
-    // If the current position is at an illegal UTF8 value, return a single
-    // space character.
-    int operator*() const;
-
-    // Store the UTF-8 encoding of the current codepoint into buf, which must be
-    // at least 4 bytes long. Return the number of bytes written.
-    // If the current position is at an illegal UTF8 value, writes a single
-    // space character and returns 1.
-    // Note that this method does not null-terminate the buffer.
-    int get_utf8(char *buf) const;
-    // Returns the number of bytes of the current codepoint. Returns 1 if the
-    // current position is at an illegal UTF8 value.
-    int utf8_len() const;
-    // Returns true if the UTF-8 encoding at the current position is legal.
-    bool is_legal() const;
-
-    // Return the pointer into the string at the current position.
-    const char *utf8_data() const {
-      return it_;
-    }
-
-    // Iterator equality operators.
-    friend bool operator==(const CI &lhs, const CI &rhs) {
-      return lhs.it_ == rhs.it_;
-    }
-    friend bool operator!=(const CI &lhs, const CI &rhs) {
-      return !(lhs == rhs);
-    }
-
-  private:
-    friend class UNICHAR;
-    explicit const_iterator(const char *it) : it_(it) {}
-
-    const char *it_; // Pointer into the string.
-  };
-
-  // Create a start/end iterator pointing to a string. Note that these methods
-  // are static and do NOT create a copy or take ownership of the underlying
-  // array.
-  static const_iterator begin(const char *utf8_str, int byte_length);
-  static const_iterator end(const char *utf8_str, int byte_length);
-
-  // Converts a utf-8 string to a vector of unicodes.
-  // Returns an empty vector if the input contains invalid UTF-8.
-  static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
-  // Converts a vector of unicodes to a utf8 string.
-  // Returns an empty string if the input contains an invalid unicode.
-  static std::string UTF32ToUTF8(const std::vector<char32> &str32);
-
-private:
-  // A UTF-8 representation of 1 or more Unicode characters.
-  // The last element (chars[UNICHAR_LEN - 1]) is a length if
-  // its value < UNICHAR_LEN, otherwise it is a genuine character.
-  char chars[UNICHAR_LEN]{};
-};
-
-} // namespace tesseract
-
-#endif // TESSERACT_CCUTIL_UNICHAR_H_
--- a/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/version.h
+++ b/third_party/ocr/tesseract-ocr/kylin/amd64/include/tesseract/version.h
@ -1,34 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        version.h
-// Description: Version information
-//
-// (C) Copyright 2018, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_VERSION_H_
-#define TESSERACT_API_VERSION_H_
-
-// clang-format off
-
-#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
-#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
-#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
-
-#define TESSERACT_VERSION          \
-  (TESSERACT_MAJOR_VERSION << 16 | \
-   TESSERACT_MINOR_VERSION <<  8 | \
-   TESSERACT_MICRO_VERSION)
-
-#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
-
-// clang-format on
-
-#endif // TESSERACT_API_VERSION_H_
--- a/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/baseapi.h
+++ b/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/baseapi.h
@ -1,812 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        baseapi.h
-// Description: Simple API for calling tesseract.
-// Author:      Ray Smith
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_BASEAPI_H_
-#define TESSERACT_API_BASEAPI_H_
-
-#ifdef HAVE_CONFIG_H
-#  include "config_auto.h" // DISABLED_LEGACY_ENGINE
-#endif
-
-#include "export.h"
-#include "pageiterator.h"
-#include "publictypes.h"
-#include "resultiterator.h"
-#include "unichar.h"
-
-#include "version.h"
-
-#include <cstdio>
-#include <vector> // for std::vector
-
-struct Pix;
-struct Pixa;
-struct Boxa;
-
-namespace tesseract {
-
-class PAGE_RES;
-class ParagraphModel;
-class BLOCK_LIST;
-class ETEXT_DESC;
-struct OSResults;
-class UNICHARSET;
-
-class Dawg;
-class Dict;
-class EquationDetect;
-class PageIterator;
-class ImageThresholder;
-class LTRResultIterator;
-class ResultIterator;
-class MutableIterator;
-class TessResultRenderer;
-class Tesseract;
-
-// Function to read a std::vector<char> from a whole file.
-// Returns false on failure.
-using FileReader = bool (*)(const char *filename, std::vector<char> *data);
-
-using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
-                               bool) const;
-using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
-                                                  int, const char *, int);
-
-/**
- * Base class for all tesseract APIs.
- * Specific classes can add ability to work on different inputs or produce
- * different outputs.
- * This class is mostly an interface layer on top of the Tesseract instance
- * class to hide the data types so that users of this class don't have to
- * include any other Tesseract headers.
- */
-class TESS_API TessBaseAPI {
-public:
-  TessBaseAPI();
-  virtual ~TessBaseAPI();
-  // Copy constructor and assignment operator are currently unsupported.
-  TessBaseAPI(TessBaseAPI const &) = delete;
-  TessBaseAPI &operator=(TessBaseAPI const &) = delete;
-
-  /**
-   * Returns the version identifier as a static string. Do not delete.
-   */
-  static const char *Version();
-
-  /**
-   * If compiled with OpenCL AND an available OpenCL
-   * device is deemed faster than serial code, then
-   * "device" is populated with the cl_device_id
-   * and returns sizeof(cl_device_id)
-   * otherwise *device=nullptr and returns 0.
-   */
-  static size_t getOpenCLDevice(void **device);
-
-  /**
-   * Set the name of the input file. Needed for training and
-   * reading a UNLV zone file, and for searchable PDF output.
-   */
-  void SetInputName(const char *name);
-  /**
-   * These functions are required for searchable PDF output.
-   * We need our hands on the input file so that we can include
-   * it in the PDF without transcoding. If that is not possible,
-   * we need the original image. Finally, resolution metadata
-   * is stored in the PDF so we need that as well.
-   */
-  const char *GetInputName();
-  // Takes ownership of the input pix.
-  void SetInputImage(Pix *pix);
-  Pix *GetInputImage();
-  int GetSourceYResolution();
-  const char *GetDatapath();
-
-  /** Set the name of the bonus output files. Needed only for debugging. */
-  void SetOutputName(const char *name);
-
-  /**
-   * Set the value of an internal "parameter."
-   * Supply the name of the parameter and the value as a string, just as
-   * you would in a config file.
-   * Returns false if the name lookup failed.
-   * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
-   * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
-   * SetVariable may be used before Init, but settings will revert to
-   * defaults on End().
-   *
-   * Note: Must be called after Init(). Only works for non-init variables
-   * (init variables should be passed to Init()).
-   */
-  bool SetVariable(const char *name, const char *value);
-  bool SetDebugVariable(const char *name, const char *value);
-
-  /**
-   * Returns true if the parameter was found among Tesseract parameters.
-   * Fills in value with the value of the parameter.
-   */
-  bool GetIntVariable(const char *name, int *value) const;
-  bool GetBoolVariable(const char *name, bool *value) const;
-  bool GetDoubleVariable(const char *name, double *value) const;
-
-  /**
-   * Returns the pointer to the string that represents the value of the
-   * parameter if it was found among Tesseract parameters.
-   */
-  const char *GetStringVariable(const char *name) const;
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-  /**
-   * Print Tesseract fonts table to the given file.
-   */
-  void PrintFontsTable(FILE *fp) const;
-
-#endif
-
-  /**
-   * Print Tesseract parameters to the given file.
-   */
-  void PrintVariables(FILE *fp) const;
-
-  /**
-   * Get value of named variable as a string, if it exists.
-   */
-  bool GetVariableAsString(const char *name, std::string *val) const;
-
-  /**
-   * Instances are now mostly thread-safe and totally independent,
-   * but some global parameters remain. Basically it is safe to use multiple
-   * TessBaseAPIs in different threads in parallel, UNLESS:
-   * you use SetVariable on some of the Params in classify and textord.
-   * If you do, then the effect will be to change it for all your instances.
-   *
-   * Start tesseract. Returns zero on success and -1 on failure.
-   * NOTE that the only members that may be called before Init are those
-   * listed above here in the class definition.
-   *
-   * The datapath must be the name of the tessdata directory.
-   * The language is (usually) an ISO 639-3 string or nullptr will default to
-   * eng. It is entirely safe (and eventually will be efficient too) to call
-   * Init multiple times on the same instance to change language, or just
-   * to reset the classifier.
-   * The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
-   * that multiple languages are to be loaded. Eg hin+eng will load Hindi and
-   * English. Languages may specify internally that they want to be loaded
-   * with one or more other languages, so the ~ sign is available to override
-   * that. Eg if hin were set to load eng by default, then hin+~eng would force
-   * loading only hin. The number of loaded languages is limited only by
-   * memory, with the caveat that loading additional languages will impact
-   * both speed and accuracy, as there is more work to do to decide on the
-   * applicable language, and there is more chance of hallucinating incorrect
-   * words.
-   * WARNING: On changing languages, all Tesseract parameters are reset
-   * back to their default values. (Which may vary between languages.)
-   * If you have a rare need to set a Variable that controls
-   * initialization for a second call to Init you should explicitly
-   * call End() and then use SetVariable before Init. This is only a very
-   * rare use case, since there are very few uses that require any parameters
-   * to be set before Init.
-   *
-   * If set_only_non_debug_params is true, only params that do not contain
-   * "debug" in the name will be set.
-   */
-  int Init(const char *datapath, const char *language, OcrEngineMode mode,
-           char **configs, int configs_size,
-           const std::vector<std::string> *vars_vec,
-           const std::vector<std::string> *vars_values,
-           bool set_only_non_debug_params);
-  int Init(const char *datapath, const char *language, OcrEngineMode oem) {
-    return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
-  }
-  int Init(const char *datapath, const char *language) {
-    return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
-                false);
-  }
-  // In-memory version reads the traineddata file directly from the given
-  // data[data_size] array, and/or reads data via a FileReader.
-  int Init(const char *data, int data_size, const char *language,
-           OcrEngineMode mode, char **configs, int configs_size,
-           const std::vector<std::string> *vars_vec,
-           const std::vector<std::string> *vars_values,
-           bool set_only_non_debug_params, FileReader reader);
-
-  /**
-   * Returns the languages string used in the last valid initialization.
-   * If the last initialization specified "deu+hin" then that will be
-   * returned. If hin loaded eng automatically as well, then that will
-   * not be included in this list. To find the languages actually
-   * loaded use GetLoadedLanguagesAsVector.
-   * The returned string should NOT be deleted.
-   */
-  const char *GetInitLanguagesAsString() const;
-
-  /**
-   * Returns the loaded languages in the vector of std::string.
-   * Includes all languages loaded by the last Init, including those loaded
-   * as dependencies of other loaded languages.
-   */
-  void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
-
-  /**
-   * Returns the available languages in the sorted vector of std::string.
-   */
-  void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
-
-  /**
-   * Init only for page layout analysis. Use only for calls to SetImage and
-   * AnalysePage. Calls that attempt recognition will generate an error.
-   */
-  void InitForAnalysePage();
-
-  /**
-   * Read a "config" file containing a set of param, value pairs.
-   * Searches the standard places: tessdata/configs, tessdata/tessconfigs
-   * and also accepts a relative or absolute path name.
-   * Note: only non-init params will be set (init params are set by Init()).
-   */
-  void ReadConfigFile(const char *filename);
-  /** Same as above, but only set debug params from the given config file. */
-  void ReadDebugConfigFile(const char *filename);
-
-  /**
-   * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
-   * The mode is stored as an IntParam so it can also be modified by
-   * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
-   */
-  void SetPageSegMode(PageSegMode mode);
-
-  /** Return the current page segmentation mode. */
-  PageSegMode GetPageSegMode() const;
-
-  /**
-   * Recognize a rectangle from an image and return the result as a string.
-   * May be called many times for a single Init.
-   * Currently has no error checking.
-   * Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
-   * Palette color images will not work properly and must be converted to
-   * 24 bit.
-   * Binary images of 1 bit per pixel may also be given but they must be
-   * byte packed with the MSB of the first byte being the first pixel, and a
-   * 1 represents WHITE. For binary images set bytes_per_pixel=0.
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   *
-   * Note that TesseractRect is the simplified convenience interface.
-   * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
-   * and one or more of the Get*Text functions below.
-   */
-  char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
-                      int bytes_per_line, int left, int top, int width,
-                      int height);
-
-  /**
-   * Call between pages or documents etc to free up memory and forget
-   * adaptive data.
-   */
-  void ClearAdaptiveClassifier();
-
-  /**
-   * @defgroup AdvancedAPI Advanced API
-   * The following methods break TesseractRect into pieces, so you can
-   * get hold of the thresholded image, get the text in different formats,
-   * get bounding boxes, confidences etc.
-   */
-  /* @{ */
-
-  /**
-   * Provide an image for Tesseract to recognize. Format is as
-   * TesseractRect above. Copies the image buffer and converts to Pix.
-   * SetImage clears all recognition results, and sets the rectangle to the
-   * full image, so it may be followed immediately by a GetUTF8Text, and it
-   * will automatically perform recognition.
-   */
-  void SetImage(const unsigned char *imagedata, int width, int height,
-                int bytes_per_pixel, int bytes_per_line);
-
-  /**
-   * Provide an image for Tesseract to recognize. As with SetImage above,
-   * Tesseract takes its own copy of the image, so it need not persist until
-   * after Recognize.
-   * Pix vs raw, which to use?
-   * Use Pix where possible. Tesseract uses Pix as its internal representation
-   * and it is therefore more efficient to provide a Pix directly.
-   */
-  void SetImage(Pix *pix);
-
-  /**
-   * Set the resolution of the source image in pixels per inch so font size
-   * information can be calculated in results.  Call this after SetImage().
-   */
-  void SetSourceResolution(int ppi);
-
-  /**
-   * Restrict recognition to a sub-rectangle of the image. Call after SetImage.
-   * Each SetRectangle clears the recogntion results so multiple rectangles
-   * can be recognized with the same image.
-   */
-  void SetRectangle(int left, int top, int width, int height);
-
-  /**
-   * Get a copy of the internal thresholded image from Tesseract.
-   * Caller takes ownership of the Pix and must pixDestroy it.
-   * May be called any time after SetImage, or after TesseractRect.
-   */
-  Pix *GetThresholdedImage();
-
-  /**
-   * Get the result of page layout analysis as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   */
-  Boxa *GetRegions(Pixa **pixa);
-
-  /**
-   * Get the textlines as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * If raw_image is true, then extract from the original image instead of the
-   * thresholded image and pad by raw_padding pixels.
-   * If blockids is not nullptr, the block-id of each line is also returned as
-   * an array of one element per line. delete [] after use. If paraids is not
-   * nullptr, the paragraph-id of each line within its block is also returned as
-   * an array of one element per line. delete [] after use.
-   */
-  Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
-                     int **blockids, int **paraids);
-  /*
-   Helper method to extract from the thresholded image. (most common usage)
-*/
-  Boxa *GetTextlines(Pixa **pixa, int **blockids) {
-    return GetTextlines(false, 0, pixa, blockids, nullptr);
-  }
-
-  /**
-   * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
-   * pair, in reading order. Enables downstream handling of non-rectangular
-   * regions.
-   * Can be called before or after Recognize.
-   * If blockids is not nullptr, the block-id of each line is also returned as
-   * an array of one element per line. delete [] after use.
-   */
-  Boxa *GetStrips(Pixa **pixa, int **blockids);
-
-  /**
-   * Get the words as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   */
-  Boxa *GetWords(Pixa **pixa);
-
-  /**
-   * Gets the individual connected (text) components (created
-   * after pages segmentation step, but before recognition)
-   * as a leptonica-style Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * Note: the caller is responsible for calling boxaDestroy()
-   * on the returned Boxa array and pixaDestroy() on cc array.
-   */
-  Boxa *GetConnectedComponents(Pixa **cc);
-
-  /**
-   * Get the given level kind of components (block, textline, word etc.) as a
-   * leptonica-style Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * If blockids is not nullptr, the block-id of each component is also returned
-   * as an array of one element per component. delete [] after use.
-   * If blockids is not nullptr, the paragraph-id of each component with its
-   * block is also returned as an array of one element per component. delete []
-   * after use. If raw_image is true, then portions of the original image are
-   * extracted instead of the thresholded image and padded with raw_padding. If
-   * text_only is true, then only text components are returned.
-   */
-  Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
-                           bool raw_image, int raw_padding, Pixa **pixa,
-                           int **blockids, int **paraids);
-  // Helper function to get binary images with no padding (most common usage).
-  Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
-                           Pixa **pixa, int **blockids) {
-    return GetComponentImages(level, text_only, false, 0, pixa, blockids,
-                              nullptr);
-  }
-
-  /**
-   * Returns the scale factor of the thresholded image that would be returned by
-   * GetThresholdedImage() and the various GetX() methods that call
-   * GetComponentImages().
-   * Returns 0 if no thresholder has been set.
-   */
-  int GetThresholdedImageScaleFactor() const;
-
-  /**
-   * Runs page layout analysis in the mode set by SetPageSegMode.
-   * May optionally be called prior to Recognize to get access to just
-   * the page layout results. Returns an iterator to the results.
-   * If merge_similar_words is true, words are combined where suitable for use
-   * with a line recognizer. Use if you want to use AnalyseLayout to find the
-   * textlines, and then want to process textline fragments with an external
-   * line recognizer.
-   * Returns nullptr on error or an empty page.
-   * The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  PageIterator *AnalyseLayout();
-  PageIterator *AnalyseLayout(bool merge_similar_words);
-
-  /**
-   * Recognize the image from SetAndThresholdImage, generating Tesseract
-   * internal structures. Returns 0 on success.
-   * Optional. The Get*Text functions below will call Recognize if needed.
-   * After Recognize, the output is kept internally until the next SetImage.
-   */
-  int Recognize(ETEXT_DESC *monitor);
-
-  /**
-   * Methods to retrieve information after SetAndThresholdImage(),
-   * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
-   */
-
-  /**
-   * Turns images into symbolic text.
-   *
-   * filename can point to a single image, a multi-page TIFF,
-   * or a plain text list of image filenames.
-   *
-   * retry_config is useful for debugging. If not nullptr, you can fall
-   * back to an alternate configuration if a page fails for some
-   * reason.
-   *
-   * timeout_millisec terminates processing if any single page
-   * takes too long. Set to 0 for unlimited time.
-   *
-   * renderer is responible for creating the output. For example,
-   * use the TessTextRenderer if you want plaintext output, or
-   * the TessPDFRender to produce searchable PDF.
-   *
-   * If tessedit_page_number is non-negative, will only process that
-   * single page. Works for multi-page tiff file, or filelist.
-   *
-   * Returns true if successful, false on error.
-   */
-  bool ProcessPages(const char *filename, const char *retry_config,
-                    int timeout_millisec, TessResultRenderer *renderer);
-  // Does the real work of ProcessPages.
-  bool ProcessPagesInternal(const char *filename, const char *retry_config,
-                            int timeout_millisec, TessResultRenderer *renderer);
-
-  /**
-   * Turn a single image into symbolic text.
-   *
-   * The pix is the image processed. filename and page_index are
-   * metadata used by side-effect processes, such as reading a box
-   * file or formatting as hOCR.
-   *
-   * See ProcessPages for descriptions of other parameters.
-   */
-  bool ProcessPage(Pix *pix, int page_index, const char *filename,
-                   const char *retry_config, int timeout_millisec,
-                   TessResultRenderer *renderer);
-
-  /**
-   * Get a reading-order iterator to the results of LayoutAnalysis and/or
-   * Recognize. The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  ResultIterator *GetIterator();
-
-  /**
-   * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
-   * The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  MutableIterator *GetMutableIterator();
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   */
-  char *GetUTF8Text();
-
-  /**
-   * Make a HTML-formatted string with hOCR markup from the internal
-   * data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * monitor can be used to
-   *  cancel the recognition
-   *  receive progress callbacks
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
-
-  /**
-   * Make a HTML-formatted string with hOCR markup from the internal
-   * data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetHOCRText(int page_number);
-
-  /**
-   * Make an XML-formatted string with Alto markup from the internal
-   * data structures.
-   */
-  char *GetAltoText(ETEXT_DESC *monitor, int page_number);
-
-  /**
-   * Make an XML-formatted string with Alto markup from the internal
-   * data structures.
-   */
-  char *GetAltoText(int page_number);
-
-  /**
-   * Make a TSV-formatted string from the internal data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetTSVText(int page_number);
-
-  /**
-   * Make a box file for LSTM training from the internal data structures.
-   * Constructs coordinates in the original image - not just the rectangle.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetLSTMBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded in the same
-   * format as a box file used in training.
-   * Constructs coordinates in the original image - not just the rectangle.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded in the same
-   * format as a WordStr box file used in training.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetWordStrBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UNLV format Latin-1 with specific reject and suspect codes.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetUNLVText();
-
-  /**
-   * Detect the orientation of the input image and apparent script (alphabet).
-   * orient_deg is the detected clockwise rotation of the input image in degrees
-   * (0, 90, 180, 270)
-   * orient_conf is the confidence (15.0 is reasonably confident)
-   * script_name is an ASCII string, the name of the script, e.g. "Latin"
-   * script_conf is confidence level in the script
-   * Returns true on success and writes values to each parameter as an output
-   */
-  bool DetectOrientationScript(int *orient_deg, float *orient_conf,
-                               const char **script_name, float *script_conf);
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   * page_number is a 0-based page index that will appear in the osd file.
-   */
-  char *GetOsdText(int page_number);
-
-  /** Returns the (average) confidence value between 0 and 100. */
-  int MeanTextConf();
-  /**
-   * Returns all word confidences (between 0 and 100) in an array, terminated
-   * by -1.  The calling function must delete [] after use.
-   * The number of confidences should correspond to the number of space-
-   * delimited words in GetUTF8Text.
-   */
-  int *AllWordConfidences();
-
-#ifndef DISABLED_LEGACY_ENGINE
-  /**
-   * Applies the given word to the adaptive classifier if possible.
-   * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
-   * tell the boundaries of the graphemes.
-   * Assumes that SetImage/SetRectangle have been used to set the image
-   * to the given word. The mode arg should be PSM_SINGLE_WORD or
-   * PSM_CIRCLE_WORD, as that will be used to control layout analysis.
-   * The currently set PageSegMode is preserved.
-   * Returns false if adaption was not possible for some reason.
-   */
-  bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
-#endif //  ndef DISABLED_LEGACY_ENGINE
-
-  /**
-   * Free up recognition results and any stored image data, without actually
-   * freeing any recognition data that would be time-consuming to reload.
-   * Afterwards, you must call SetImage or TesseractRect before doing
-   * any Recognize or Get* operation.
-   */
-  void Clear();
-
-  /**
-   * Close down tesseract and free up all memory. End() is equivalent to
-   * destructing and reconstructing your TessBaseAPI.
-   * Once End() has been used, none of the other API functions may be used
-   * other than Init and anything declared above it in the class definition.
-   */
-  void End();
-
-  /**
-   * Clear any library-level memory caches.
-   * There are a variety of expensive-to-load constant data structures (mostly
-   * language dictionaries) that are cached globally -- surviving the Init()
-   * and End() of individual TessBaseAPI's.  This function allows the clearing
-   * of these caches.
-   **/
-  static void ClearPersistentCache();
-
-  /**
-   * Check whether a word is valid according to Tesseract's language model
-   * @return 0 if the word is invalid, non-zero if valid.
-   * @warning temporary! This function will be removed from here and placed
-   * in a separate API at some future time.
-   */
-  int IsValidWord(const char *word) const;
-  // Returns true if utf8_character is defined in the UniCharset.
-  bool IsValidCharacter(const char *utf8_character) const;
-
-  bool GetTextDirection(int *out_offset, float *out_slope);
-
-  /** Sets Dict::letter_is_okay_ function to point to the given function. */
-  void SetDictFunc(DictFunc f);
-
-  /** Sets Dict::probability_in_context_ function to point to the given
-   * function.
-   */
-  void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
-
-  /**
-   * Estimates the Orientation And Script of the image.
-   * @return true if the image was processed successfully.
-   */
-  bool DetectOS(OSResults *);
-
-  /**
-   * Return text orientation of each block as determined by an earlier run
-   * of layout analysis.
-   */
-  void GetBlockTextOrientations(int **block_orientation,
-                                bool **vertical_writing);
-
-  /** This method returns the string form of the specified unichar. */
-  const char *GetUnichar(int unichar_id) const;
-
-  /** Return the pointer to the i-th dawg loaded into tesseract_ object. */
-  const Dawg *GetDawg(int i) const;
-
-  /** Return the number of dawgs loaded into tesseract_ object. */
-  int NumDawgs() const;
-
-  Tesseract *tesseract() const {
-    return tesseract_;
-  }
-
-  OcrEngineMode oem() const {
-    return last_oem_requested_;
-  }
-
-  void set_min_orientation_margin(double margin);
-  /* @} */
-
-protected:
-  /** Common code for setting the image. Returns true if Init has been called.
-   */
-  bool InternalSetImage();
-
-  /**
-   * Run the thresholder to make the thresholded image. If pix is not nullptr,
-   * the source is thresholded to pix instead of the internal IMAGE.
-   */
-  virtual bool Threshold(Pix **pix);
-
-  /**
-   * Find lines from the image making the BLOCK_LIST.
-   * @return 0 on success.
-   */
-  int FindLines();
-
-  /** Delete the pageres and block list ready for a new page. */
-  void ClearResults();
-
-  /**
-   * Return an LTR Result Iterator -- used only for training, as we really want
-   * to ignore all BiDi smarts at that point.
-   * delete once you're done with it.
-   */
-  LTRResultIterator *GetLTRIterator();
-
-  /**
-   * Return the length of the output text string, as UTF8, assuming
-   * one newline per line and one per block, with a terminator,
-   * and assuming a single character reject marker for each rejected character.
-   * Also return the number of recognized blobs in blob_count.
-   */
-  int TextLength(int *blob_count) const;
-
-  //// paragraphs.cpp ////////////////////////////////////////////////////
-  void DetectParagraphs(bool after_text_recognition);
-
-  const PAGE_RES *GetPageRes() const {
-    return page_res_;
-  }
-
-protected:
-  Tesseract *tesseract_;          ///< The underlying data object.
-  Tesseract *osd_tesseract_;      ///< For orientation & script detection.
-  EquationDetect *equ_detect_;    ///< The equation detector.
-  FileReader reader_;             ///< Reads files from any filesystem.
-  ImageThresholder *thresholder_; ///< Image thresholding module.
-  std::vector<ParagraphModel *> *paragraph_models_;
-  BLOCK_LIST *block_list_;           ///< The page layout.
-  PAGE_RES *page_res_;               ///< The page-level data.
-  std::string input_file_;           ///< Name used by training code.
-  std::string output_file_;          ///< Name used by debug code.
-  std::string datapath_;             ///< Current location of tessdata.
-  std::string language_;             ///< Last initialized language.
-  OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
-  bool recognition_done_;            ///< page_res_ contains recognition data.
-
-  /**
-   * @defgroup ThresholderParams Thresholder Parameters
-   * Parameters saved from the Thresholder. Needed to rebuild coordinates.
-   */
-  /* @{ */
-  int rect_left_;
-  int rect_top_;
-  int rect_width_;
-  int rect_height_;
-  int image_width_;
-  int image_height_;
-  /* @} */
-
-private:
-  // A list of image filenames gets special consideration
-  bool ProcessPagesFileList(FILE *fp, std::string *buf,
-                            const char *retry_config, int timeout_millisec,
-                            TessResultRenderer *renderer,
-                            int tessedit_page_number);
-  // TIFF supports multipage so gets special consideration.
-  bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
-                                 const char *filename, const char *retry_config,
-                                 int timeout_millisec,
-                                 TessResultRenderer *renderer,
-                                 int tessedit_page_number);
-}; // class TessBaseAPI.
-
-/** Escape a char string - remove &<>"' with HTML codes. */
-std::string HOcrEscape(const char *text);
-
-} // namespace tesseract
-
-#endif // TESSERACT_API_BASEAPI_H_
--- a/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/capi.h
+++ b/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/capi.h
@ -1,484 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        capi.h
-// Description: C-API TessBaseAPI
-//
-// (C) Copyright 2012, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef API_CAPI_H_
-#define API_CAPI_H_
-
-#include "export.h"
-
-#ifdef __cplusplus
-#  include <tesseract/baseapi.h>
-#  include <tesseract/ocrclass.h>
-#  include <tesseract/pageiterator.h>
-#  include <tesseract/renderer.h>
-#  include <tesseract/resultiterator.h>
-#endif
-
-#include <stdbool.h>
-#include <stdio.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef BOOL
-#  define BOOL int
-#  define TRUE 1
-#  define FALSE 0
-#endif
-
-#ifdef __cplusplus
-typedef tesseract::TessResultRenderer TessResultRenderer;
-typedef tesseract::TessBaseAPI TessBaseAPI;
-typedef tesseract::PageIterator TessPageIterator;
-typedef tesseract::ResultIterator TessResultIterator;
-typedef tesseract::MutableIterator TessMutableIterator;
-typedef tesseract::ChoiceIterator TessChoiceIterator;
-typedef tesseract::OcrEngineMode TessOcrEngineMode;
-typedef tesseract::PageSegMode TessPageSegMode;
-typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
-typedef tesseract::Orientation TessOrientation;
-typedef tesseract::ParagraphJustification TessParagraphJustification;
-typedef tesseract::WritingDirection TessWritingDirection;
-typedef tesseract::TextlineOrder TessTextlineOrder;
-typedef tesseract::PolyBlockType TessPolyBlockType;
-typedef tesseract::ETEXT_DESC ETEXT_DESC;
-#else
-typedef struct TessResultRenderer TessResultRenderer;
-typedef struct TessBaseAPI TessBaseAPI;
-typedef struct TessPageIterator TessPageIterator;
-typedef struct TessResultIterator TessResultIterator;
-typedef struct TessMutableIterator TessMutableIterator;
-typedef struct TessChoiceIterator TessChoiceIterator;
-typedef enum TessOcrEngineMode {
-  OEM_TESSERACT_ONLY,
-  OEM_LSTM_ONLY,
-  OEM_TESSERACT_LSTM_COMBINED,
-  OEM_DEFAULT
-} TessOcrEngineMode;
-typedef enum TessPageSegMode {
-  PSM_OSD_ONLY,
-  PSM_AUTO_OSD,
-  PSM_AUTO_ONLY,
-  PSM_AUTO,
-  PSM_SINGLE_COLUMN,
-  PSM_SINGLE_BLOCK_VERT_TEXT,
-  PSM_SINGLE_BLOCK,
-  PSM_SINGLE_LINE,
-  PSM_SINGLE_WORD,
-  PSM_CIRCLE_WORD,
-  PSM_SINGLE_CHAR,
-  PSM_SPARSE_TEXT,
-  PSM_SPARSE_TEXT_OSD,
-  PSM_RAW_LINE,
-  PSM_COUNT
-} TessPageSegMode;
-typedef enum TessPageIteratorLevel {
-  RIL_BLOCK,
-  RIL_PARA,
-  RIL_TEXTLINE,
-  RIL_WORD,
-  RIL_SYMBOL
-} TessPageIteratorLevel;
-typedef enum TessPolyBlockType {
-  PT_UNKNOWN,
-  PT_FLOWING_TEXT,
-  PT_HEADING_TEXT,
-  PT_PULLOUT_TEXT,
-  PT_EQUATION,
-  PT_INLINE_EQUATION,
-  PT_TABLE,
-  PT_VERTICAL_TEXT,
-  PT_CAPTION_TEXT,
-  PT_FLOWING_IMAGE,
-  PT_HEADING_IMAGE,
-  PT_PULLOUT_IMAGE,
-  PT_HORZ_LINE,
-  PT_VERT_LINE,
-  PT_NOISE,
-  PT_COUNT
-} TessPolyBlockType;
-typedef enum TessOrientation {
-  ORIENTATION_PAGE_UP,
-  ORIENTATION_PAGE_RIGHT,
-  ORIENTATION_PAGE_DOWN,
-  ORIENTATION_PAGE_LEFT
-} TessOrientation;
-typedef enum TessParagraphJustification {
-  JUSTIFICATION_UNKNOWN,
-  JUSTIFICATION_LEFT,
-  JUSTIFICATION_CENTER,
-  JUSTIFICATION_RIGHT
-} TessParagraphJustification;
-typedef enum TessWritingDirection {
-  WRITING_DIRECTION_LEFT_TO_RIGHT,
-  WRITING_DIRECTION_RIGHT_TO_LEFT,
-  WRITING_DIRECTION_TOP_TO_BOTTOM
-} TessWritingDirection;
-typedef enum TessTextlineOrder {
-  TEXTLINE_ORDER_LEFT_TO_RIGHT,
-  TEXTLINE_ORDER_RIGHT_TO_LEFT,
-  TEXTLINE_ORDER_TOP_TO_BOTTOM
-} TessTextlineOrder;
-typedef struct ETEXT_DESC ETEXT_DESC;
-#endif
-
-typedef bool (*TessCancelFunc)(void *cancel_this, int words);
-typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
-                                 int bottom);
-
-struct Pix;
-struct Boxa;
-struct Pixa;
-
-/* General free functions */
-
-TESS_API const char *TessVersion();
-TESS_API void TessDeleteText(const char *text);
-TESS_API void TessDeleteTextArray(char **arr);
-TESS_API void TessDeleteIntArray(const int *arr);
-
-/* Renderer API */
-TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
-                                                     BOOL font_info);
-TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
-                                                   const char *datadir,
-                                                   BOOL textonly);
-TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
-    const char *outputbase);
-
-TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
-TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
-                                       TessResultRenderer *next);
-TESS_API TessResultRenderer *TessResultRendererNext(
-    TessResultRenderer *renderer);
-TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
-                                              const char *title);
-TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
-                                         TessBaseAPI *api);
-TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
-
-TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
-TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
-TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
-
-/* Base API */
-
-TESS_API TessBaseAPI *TessBaseAPICreate();
-TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
-
-TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
-
-TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
-TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
-TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
-TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
-
-TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
-                                     const char *value);
-TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
-                                          const char *value);
-
-TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
-                                        const char *name, int *value);
-TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
-                                         const char *name, BOOL *value);
-TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
-                                           const char *name, double *value);
-TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
-                                                  const char *name);
-
-TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
-TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
-                                              const char *filename);
-
-TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode oem,
-                              char **configs, int configs_size);
-TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode oem);
-TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
-                              const char *language);
-
-TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode mode,
-                              char **configs, int configs_size, char **vars_vec,
-                              char **vars_values, size_t vars_vec_size,
-                              BOOL set_only_non_debug_params);
-
-TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
-                              const char *language, TessOcrEngineMode mode,
-                              char **configs, int configs_size, char **vars_vec,
-                              char **vars_values, size_t vars_vec_size,
-                              BOOL set_only_non_debug_params);
-
-TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
-    const TessBaseAPI *handle);
-TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
-    const TessBaseAPI *handle);
-TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
-    const TessBaseAPI *handle);
-
-TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
-                                        const char *filename);
-TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
-                                             const char *filename);
-
-TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
-                                        TessPageSegMode mode);
-TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
-
-TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
-                               const unsigned char *imagedata,
-                               int bytes_per_pixel, int bytes_per_line,
-                               int left, int top, int width, int height);
-
-TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
-                                  const unsigned char *imagedata, int width,
-                                  int height, int bytes_per_pixel,
-                                  int bytes_per_line);
-TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
-
-TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
-
-TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
-                                      int width, int height);
-
-TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
-TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
-                                            struct Pixa **pixa);
-TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
-                                              struct Pixa **pixa,
-                                              int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
-                                               BOOL raw_image, int raw_padding,
-                                               struct Pixa **pixa,
-                                               int **blockids, int **paraids);
-TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
-                                           struct Pixa **pixa, int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
-                                          struct Pixa **pixa);
-TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
-                                                        struct Pixa **cc);
-TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
-                                                    TessPageIteratorLevel level,
-                                                    BOOL text_only,
-                                                    struct Pixa **pixa,
-                                                    int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
-    TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
-    BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
-    int **paraids);
-
-TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
-    const TessBaseAPI *handle);
-
-TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
-
-TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
-                                      const char *retry_config,
-                                      int timeout_millisec,
-                                      TessResultRenderer *renderer);
-TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
-                                     int page_index, const char *filename,
-                                     const char *retry_config,
-                                     int timeout_millisec,
-                                     TessResultRenderer *renderer);
-
-TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
-TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
-    TessBaseAPI *handle);
-
-TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
-TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
-
-TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
-
-TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
-                                            int page_number);
-
-TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
-TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
-
-TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
-
-#ifndef DISABLED_LEGACY_ENGINE
-TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
-                                        TessPageSegMode mode,
-                                        const char *wordstr);
-#endif // #ifndef DISABLED_LEGACY_ENGINE
-
-TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
-TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
-TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
-                                          float *out_slope);
-
-TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
-
-TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-// Call TessDeleteText(*best_script_name) to free memory allocated by this
-// function
-TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
-                                                 int *orient_deg,
-                                                 float *orient_conf,
-                                                 const char **script_name,
-                                                 float *script_conf);
-#endif // #ifndef DISABLED_LEGACY_ENGINE
-
-TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
-                                                 double margin);
-
-TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
-
-TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
-
-TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
-                                               int **block_orientation,
-                                               bool **vertical_writing);
-
-/* Page iterator */
-
-TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
-
-TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
-
-TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
-
-TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
-                                   TessPageIteratorLevel level);
-
-TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
-                                              TessPageIteratorLevel level);
-
-TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
-                                               TessPageIteratorLevel level,
-                                               TessPageIteratorLevel element);
-
-TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
-                                          TessPageIteratorLevel level,
-                                          int *left, int *top, int *right,
-                                          int *bottom);
-
-TESS_API TessPolyBlockType
-TessPageIteratorBlockType(const TessPageIterator *handle);
-
-TESS_API struct Pix *TessPageIteratorGetBinaryImage(
-    const TessPageIterator *handle, TessPageIteratorLevel level);
-
-TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
-                                              TessPageIteratorLevel level,
-                                              int padding,
-                                              struct Pix *original_image,
-                                              int *left, int *top);
-
-TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
-                                       TessPageIteratorLevel level, int *x1,
-                                       int *y1, int *x2, int *y2);
-
-TESS_API void TessPageIteratorOrientation(
-    TessPageIterator *handle, TessOrientation *orientation,
-    TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
-    float *deskew_angle);
-
-TESS_API void TessPageIteratorParagraphInfo(
-    TessPageIterator *handle, TessParagraphJustification *justification,
-    BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
-
-/* Result iterator */
-
-TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
-TESS_API TessResultIterator *TessResultIteratorCopy(
-    const TessResultIterator *handle);
-TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
-    TessResultIterator *handle);
-TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
-    const TessResultIterator *handle);
-TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
-    const TessResultIterator *handle);
-
-TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
-                                     TessPageIteratorLevel level);
-TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
-                                             TessPageIteratorLevel level);
-TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
-                                            TessPageIteratorLevel level);
-TESS_API const char *TessResultIteratorWordRecognitionLanguage(
-    const TessResultIterator *handle);
-TESS_API const char *TessResultIteratorWordFontAttributes(
-    const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
-    BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
-    int *pointsize, int *font_id);
-
-TESS_API BOOL
-TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
-TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
-
-TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
-TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
-TESS_API const char *TessChoiceIteratorGetUTF8Text(
-    const TessChoiceIterator *handle);
-TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
-
-/* Progress monitor */
-
-TESS_API ETEXT_DESC *TessMonitorCreate();
-TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
-                                       TessCancelFunc cancelFunc);
-TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
-TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
-                                         TessProgressFunc progressFunc);
-TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // API_CAPI_H_
--- a/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/export.h
+++ b/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/export.h
@ -1,37 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        export.h
-// Description: Place holder
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_PLATFORM_H_
-#define TESSERACT_PLATFORM_H_
-
-#ifndef TESS_API
-#  if defined(_WIN32) || defined(__CYGWIN__)
-#    if defined(TESS_EXPORTS)
-#      define TESS_API __declspec(dllexport)
-#    elif defined(TESS_IMPORTS)
-#      define TESS_API __declspec(dllimport)
-#    else
-#      define TESS_API
-#    endif
-#  else
-#    if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
-#      define TESS_API __attribute__((visibility("default")))
-#    else
-#      define TESS_API
-#    endif
-#  endif
-#endif
-
-#endif // TESSERACT_PLATFORM_H_
--- a/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/ltrresultiterator.h
+++ b/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/ltrresultiterator.h
@ -1,235 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        ltrresultiterator.h
-// Description: Iterator for tesseract results in strict left-to-right
-//              order that avoids using tesseract internal data structures.
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
-#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
-
-#include "export.h"       // for TESS_API
-#include "pageiterator.h" // for PageIterator
-#include "publictypes.h"  // for PageIteratorLevel
-#include "unichar.h"      // for StrongScriptDirection
-
-namespace tesseract {
-
-class BLOB_CHOICE_IT;
-class PAGE_RES;
-class WERD_RES;
-
-class Tesseract;
-
-// Class to iterate over tesseract results, providing access to all levels
-// of the page hierarchy, without including any tesseract headers or having
-// to handle any tesseract structures.
-// WARNING! This class points to data held within the TessBaseAPI class, and
-// therefore can only be used while the TessBaseAPI class still exists and
-// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-// DetectOS, or anything else that changes the internal PAGE_RES.
-// See tesseract/publictypes.h for the definition of PageIteratorLevel.
-// See also base class PageIterator, which contains the bulk of the interface.
-// LTRResultIterator adds text-specific methods for access to OCR output.
-
-class TESS_API LTRResultIterator : public PageIterator {
-  friend class ChoiceIterator;
-
-public:
-  // page_res and tesseract come directly from the BaseAPI.
-  // The rectangle parameters are copied indirectly from the Thresholder,
-  // via the BaseAPI. They represent the coordinates of some rectangle in an
-  // original image (in top-left-origin coordinates) and therefore the top-left
-  // needs to be added to any output boxes in order to specify coordinates
-  // in the original image. See TessBaseAPI::SetRectangle.
-  // The scale and scaled_yres are in case the Thresholder scaled the image
-  // rectangle prior to thresholding. Any coordinates in tesseract's image
-  // must be divided by scale before adding (rect_left, rect_top).
-  // The scaled_yres indicates the effective resolution of the binary image
-  // that tesseract has been given by the Thresholder.
-  // After the constructor, Begin has already been called.
-  LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
-                    int scaled_yres, int rect_left, int rect_top,
-                    int rect_width, int rect_height);
-
-  ~LTRResultIterator() override;
-
-  // LTRResultIterators may be copied! This makes it possible to iterate over
-  // all the objects at a lower level, while maintaining an iterator to
-  // objects at a higher level. These constructors DO NOT CALL Begin, so
-  // iterations will continue from the location of src.
-  // TODO: For now the copy constructor and operator= only need the base class
-  // versions, but if new data members are added, don't forget to add them!
-
-  // ============= Moving around within the page ============.
-
-  // See PageIterator.
-
-  // ============= Accessing data ==============.
-
-  // Returns the null terminated UTF-8 encoded text string for the current
-  // object at the given level. Use delete [] to free after use.
-  char *GetUTF8Text(PageIteratorLevel level) const;
-
-  // Set the string inserted at the end of each text line. "\n" by default.
-  void SetLineSeparator(const char *new_line);
-
-  // Set the string inserted at the end of each paragraph. "\n" by default.
-  void SetParagraphSeparator(const char *new_para);
-
-  // Returns the mean confidence of the current object at the given level.
-  // The number should be interpreted as a percent probability. (0.0f-100.0f)
-  float Confidence(PageIteratorLevel level) const;
-
-  // ============= Functions that refer to words only ============.
-
-  // Returns the font attributes of the current word. If iterating at a higher
-  // level object than words, eg textlines, then this will return the
-  // attributes of the first word in that textline.
-  // The actual return value is a string representing a font name. It points
-  // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
-  // the iterator itself, ie rendered invalid by various members of
-  // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
-  // Pointsize is returned in printers points (1/72 inch.)
-  const char *WordFontAttributes(bool *is_bold, bool *is_italic,
-                                 bool *is_underlined, bool *is_monospace,
-                                 bool *is_serif, bool *is_smallcaps,
-                                 int *pointsize, int *font_id) const;
-
-  // Return the name of the language used to recognize this word.
-  // On error, nullptr.  Do not delete this pointer.
-  const char *WordRecognitionLanguage() const;
-
-  // Return the overall directionality of this word.
-  StrongScriptDirection WordDirection() const;
-
-  // Returns true if the current word was found in a dictionary.
-  bool WordIsFromDictionary() const;
-
-  // Returns the number of blanks before the current word.
-  int BlanksBeforeWord() const;
-
-  // Returns true if the current word is numeric.
-  bool WordIsNumeric() const;
-
-  // Returns true if the word contains blamer information.
-  bool HasBlamerInfo() const;
-
-  // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
-  // of the current word.
-  const void *GetParamsTrainingBundle() const;
-
-  // Returns a pointer to the string with blamer information for this word.
-  // Assumes that the word's blamer_bundle is not nullptr.
-  const char *GetBlamerDebug() const;
-
-  // Returns a pointer to the string with misadaption information for this word.
-  // Assumes that the word's blamer_bundle is not nullptr.
-  const char *GetBlamerMisadaptionDebug() const;
-
-  // Returns true if a truth string was recorded for the current word.
-  bool HasTruthString() const;
-
-  // Returns true if the given string is equivalent to the truth string for
-  // the current word.
-  bool EquivalentToTruth(const char *str) const;
-
-  // Returns a null terminated UTF-8 encoded truth string for the current word.
-  // Use delete [] to free after use.
-  char *WordTruthUTF8Text() const;
-
-  // Returns a null terminated UTF-8 encoded normalized OCR string for the
-  // current word. Use delete [] to free after use.
-  char *WordNormedUTF8Text() const;
-
-  // Returns a pointer to serialized choice lattice.
-  // Fills lattice_size with the number of bytes in lattice data.
-  const char *WordLattice(int *lattice_size) const;
-
-  // ============= Functions that refer to symbols only ============.
-
-  // Returns true if the current symbol is a superscript.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsSuperscript() const;
-  // Returns true if the current symbol is a subscript.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsSubscript() const;
-  // Returns true if the current symbol is a dropcap.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsDropcap() const;
-
-protected:
-  const char *line_separator_;
-  const char *paragraph_separator_;
-};
-
-// Class to iterate over the classifier choices for a single RIL_SYMBOL.
-class TESS_API ChoiceIterator {
-public:
-  // Construction is from a LTRResultIterator that points to the symbol of
-  // interest. The ChoiceIterator allows a one-shot iteration over the
-  // choices for this symbol and after that it is useless.
-  explicit ChoiceIterator(const LTRResultIterator &result_it);
-  ~ChoiceIterator();
-
-  // Moves to the next choice for the symbol and returns false if there
-  // are none left.
-  bool Next();
-
-  // ============= Accessing data ==============.
-
-  // Returns the null terminated UTF-8 encoded text string for the current
-  // choice.
-  // NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
-  // internal structure and should NOT be delete[]ed to free after use.
-  const char *GetUTF8Text() const;
-
-  // Returns the confidence of the current choice depending on the used language
-  // data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
-  // choices for one symbol should roughly add up to 1.0f.
-  // If only traineddata of the legacy engine is used, the number should be
-  // interpreted as a percent probability. (0.0f-100.0f) In this case
-  // probabilities won't add up to 100. Each one stands on its own.
-  float Confidence() const;
-
-  // Returns a vector containing all timesteps, which belong to the currently
-  // selected symbol. A timestep is a vector containing pairs of symbols and
-  // floating point numbers. The number states the probability for the
-  // corresponding symbol.
-  std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
-
-private:
-  // clears the remaining spaces out of the results and adapt the probabilities
-  void filterSpaces();
-  // Pointer to the WERD_RES object owned by the API.
-  WERD_RES *word_res_;
-  // Iterator over the blob choices.
-  BLOB_CHOICE_IT *choice_it_;
-  std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
-  std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
-
-  const int *tstep_index_;
-  // regulates the rating granularity
-  double rating_coefficient_;
-  // leading blanks
-  int blanks_before_word_;
-  // true when there is lstm engine related trained data
-  bool oemLSTM_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/ocrclass.h
+++ b/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/ocrclass.h
@ -1,158 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-/**********************************************************************
- * File:        ocrclass.h
- * Description: Class definitions and constants for the OCR API.
- * Author:      Hewlett-Packard Co
- *
- * (C) Copyright 1996, Hewlett-Packard Co.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-/**********************************************************************
- * This file contains typedefs for all the structures used by
- * the HP OCR interface.
- * The structures are designed to allow them to be used with any
- * structure alignment up to 8.
- **********************************************************************/
-
-#ifndef CCUTIL_OCRCLASS_H_
-#define CCUTIL_OCRCLASS_H_
-
-#include <chrono>
-#include <ctime>
-
-namespace tesseract {
-
-/**********************************************************************
- * EANYCODE_CHAR
- * Description of a single character. The character code is defined by
- * the character set of the current font.
- * Output text is sent as an array of these structures.
- * Spaces and line endings in the output are represented in the
- * structures of the surrounding characters. They are not directly
- * represented as characters.
- * The first character in a word has a positive value of blanks.
- * Missing information should be set to the defaults in the comments.
- * If word bounds are known, but not character bounds, then the top and
- * bottom of each character should be those of the word. The left of the
- * first and right of the last char in each word should be set. All other
- * lefts and rights should be set to -1.
- * If set, the values of right and bottom are left+width and top+height.
- * Most of the members come directly from the parameters to ocr_append_char.
- * The formatting member uses the enhancement parameter and combines the
- * line direction stuff into the top 3 bits.
- * The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
- * 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
- * the coding is, only that it is backwards compatible with the previous
- * version.
- **********************************************************************/
-
-struct EANYCODE_CHAR { /*single character */
-  // It should be noted that the format for char_code for version 2.0 and beyond
-  // is UTF8 which means that ASCII characters will come out as one structure
-  // but other characters will be returned in two or more instances of this
-  // structure with a single byte of the  UTF8 code in each, but each will have
-  // the same bounding box. Programs which want to handle languagues with
-  // different characters sets will need to handle extended characters
-  // appropriately, but *all* code needs to be prepared to receive UTF8 coded
-  // characters for characters such as bullet and fancy quotes.
-  uint16_t char_code; /*character itself */
-  int16_t left;       /*of char (-1) */
-  int16_t right;      /*of char (-1) */
-  int16_t top;        /*of char (-1) */
-  int16_t bottom;     /*of char (-1) */
-  int16_t font_index; /*what font (0) */
-  uint8_t confidence; /*0=perfect, 100=reject (0/100) */
-  uint8_t point_size; /*of char, 72=i inch, (10) */
-  int8_t blanks;      /*no of spaces before this char (1) */
-  uint8_t formatting; /*char formatting (0) */
-};
-
-/**********************************************************************
- * ETEXT_DESC
- * Description of the output of the OCR engine.
- * This structure is used as both a progress monitor and the final
- * output header, since it needs to be a valid progress monitor while
- * the OCR engine is storing its output to shared memory.
- * During progress, all the buffer info is -1.
- * Progress starts at 0 and increases to 100 during OCR. No other constraint.
- * Additionally the progress callback contains the bounding box of the word that
- * is currently being processed.
- * Every progress callback, the OCR engine must set ocr_alive to 1.
- * The HP side will set ocr_alive to 0. Repeated failure to reset
- * to 1 indicates that the OCR engine is dead.
- * If the cancel function is not null then it is called with the number of
- * user words found. If it returns true then operation is cancelled.
- **********************************************************************/
-class ETEXT_DESC;
-
-using CANCEL_FUNC = bool (*)(void *, int);
-using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
-using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
-
-class ETEXT_DESC { // output header
-public:
-  int16_t count{0};    /// chars in this buffer(0)
-  int16_t progress{0}; /// percent complete increasing (0-100)
-  /** Progress monitor covers word recognition and it does not cover layout
-   * analysis.
-   * See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
-  int8_t more_to_come{0};       /// true if not last
-  volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
-  int8_t err_code{0};           /// for errcode use
-  CANCEL_FUNC cancel{nullptr};  /// returns true to cancel
-  PROGRESS_FUNC progress_callback{
-      nullptr};                      /// called whenever progress increases
-  PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
-  void *cancel_this{nullptr};        /// this or other data for cancel
-  std::chrono::steady_clock::time_point end_time;
-  /// Time to stop. Expected to be set only
-  /// by call to set_deadline_msecs().
-  EANYCODE_CHAR text[1]{}; /// character data
-
-  ETEXT_DESC() : progress_callback2(&default_progress_func) {
-    end_time = std::chrono::time_point<std::chrono::steady_clock,
-                                       std::chrono::milliseconds>();
-  }
-
-  // Sets the end time to be deadline_msecs milliseconds from now.
-  void set_deadline_msecs(int32_t deadline_msecs) {
-    if (deadline_msecs > 0) {
-      end_time = std::chrono::steady_clock::now() +
-                 std::chrono::milliseconds(deadline_msecs);
-    }
-  }
-
-  // Returns false if we've not passed the end_time, or have not set a deadline.
-  bool deadline_exceeded() const {
-    if (end_time.time_since_epoch() ==
-        std::chrono::steady_clock::duration::zero()) {
-      return false;
-    }
-    auto now = std::chrono::steady_clock::now();
-    return (now > end_time);
-  }
-
-private:
-  static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
-                                    int top, int bottom) {
-    if (ths->progress_callback != nullptr) {
-      return (*(ths->progress_callback))(ths->progress, left, right, top,
-                                         bottom);
-    }
-    return true;
-  }
-};
-
-} // namespace tesseract
-
-#endif // CCUTIL_OCRCLASS_H_
--- a/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/osdetect.h
+++ b/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/osdetect.h
@ -1,139 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        osdetect.h
-// Description: Orientation and script detection.
-// Author:      Samuel Charron
-//              Ranjith Unnikrishnan
-//
-// (C) Copyright 2008, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_OSDETECT_H_
-#define TESSERACT_CCMAIN_OSDETECT_H_
-
-#include "export.h" // for TESS_API
-
-#include <vector> // for std::vector
-
-namespace tesseract {
-
-class BLOBNBOX;
-class BLOBNBOX_CLIST;
-class BLOB_CHOICE_LIST;
-class TO_BLOCK_LIST;
-class UNICHARSET;
-
-class Tesseract;
-
-// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
-const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
-
-struct OSBestResult {
-  OSBestResult()
-      : orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
-  int orientation_id;
-  int script_id;
-  float sconfidence;
-  float oconfidence;
-};
-
-struct OSResults {
-  OSResults() : unicharset(nullptr) {
-    for (int i = 0; i < 4; ++i) {
-      for (int j = 0; j < kMaxNumberOfScripts; ++j) {
-        scripts_na[i][j] = 0;
-      }
-      orientations[i] = 0;
-    }
-  }
-  void update_best_orientation();
-  // Set the estimate of the orientation to the given id.
-  void set_best_orientation(int orientation_id);
-  // Update/Compute the best estimate of the script assuming the given
-  // orientation id.
-  void update_best_script(int orientation_id);
-  // Return the index of the script with the highest score for this orientation.
-  TESS_API int get_best_script(int orientation_id) const;
-  // Accumulate scores with given OSResults instance and update the best script.
-  void accumulate(const OSResults &osr);
-
-  // Print statistics.
-  void print_scores(void) const;
-  void print_scores(int orientation_id) const;
-
-  // Array holding scores for each orientation id [0,3].
-  // Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
-  // page respectively, where the values refer to the amount of clockwise
-  // rotation to be applied to the page for the text to be upright and readable.
-  float orientations[4];
-  // Script confidence scores for each of 4 possible orientations.
-  float scripts_na[4][kMaxNumberOfScripts];
-
-  UNICHARSET *unicharset;
-  OSBestResult best_result;
-};
-
-class OrientationDetector {
-public:
-  OrientationDetector(const std::vector<int> *allowed_scripts,
-                      OSResults *results);
-  bool detect_blob(BLOB_CHOICE_LIST *scores);
-  int get_orientation();
-
-private:
-  OSResults *osr_;
-  const std::vector<int> *allowed_scripts_;
-};
-
-class ScriptDetector {
-public:
-  ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
-                 tesseract::Tesseract *tess);
-  void detect_blob(BLOB_CHOICE_LIST *scores);
-  bool must_stop(int orientation) const;
-
-private:
-  OSResults *osr_;
-  static const char *korean_script_;
-  static const char *japanese_script_;
-  static const char *fraktur_script_;
-  int korean_id_;
-  int japanese_id_;
-  int katakana_id_;
-  int hiragana_id_;
-  int han_id_;
-  int hangul_id_;
-  int latin_id_;
-  int fraktur_id_;
-  tesseract::Tesseract *tess_;
-  const std::vector<int> *allowed_scripts_;
-};
-
-int orientation_and_script_detection(const char *filename, OSResults *,
-                                     tesseract::Tesseract *);
-
-int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
-              tesseract::Tesseract *tess);
-
-int os_detect_blobs(const std::vector<int> *allowed_scripts,
-                    BLOBNBOX_CLIST *blob_list, OSResults *osr,
-                    tesseract::Tesseract *tess);
-
-bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
-                    OSResults *, tesseract::Tesseract *tess);
-
-// Helper method to convert an orientation index to its value in degrees.
-// The value represents the amount of clockwise rotation in degrees that must be
-// applied for the text to be upright (readable).
-TESS_API int OrientationIdToValue(const int &id);
-
-} // namespace tesseract
-
-#endif // TESSERACT_CCMAIN_OSDETECT_H_
--- a/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/pageiterator.h
+++ b/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/pageiterator.h
@ -1,364 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        pageiterator.h
-// Description: Iterator for tesseract page structure that avoids using
-//              tesseract internal data structures.
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
-#define TESSERACT_CCMAIN_PAGEITERATOR_H_
-
-#include "export.h"
-#include "publictypes.h"
-
-struct Pix;
-struct Pta;
-
-namespace tesseract {
-
-struct BlamerBundle;
-class C_BLOB_IT;
-class PAGE_RES;
-class PAGE_RES_IT;
-class WERD;
-
-class Tesseract;
-
-/**
- * Class to iterate over tesseract page structure, providing access to all
- * levels of the page hierarchy, without including any tesseract headers or
- * having to handle any tesseract structures.
- * WARNING! This class points to data held within the TessBaseAPI class, and
- * therefore can only be used while the TessBaseAPI class still exists and
- * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
- * DetectOS, or anything else that changes the internal PAGE_RES.
- * See tesseract/publictypes.h for the definition of PageIteratorLevel.
- * See also ResultIterator, derived from PageIterator, which adds in the
- * ability to access OCR output with text-specific methods.
- */
-
-class TESS_API PageIterator {
-public:
-  /**
-   * page_res and tesseract come directly from the BaseAPI.
-   * The rectangle parameters are copied indirectly from the Thresholder,
-   * via the BaseAPI. They represent the coordinates of some rectangle in an
-   * original image (in top-left-origin coordinates) and therefore the top-left
-   * needs to be added to any output boxes in order to specify coordinates
-   * in the original image. See TessBaseAPI::SetRectangle.
-   * The scale and scaled_yres are in case the Thresholder scaled the image
-   * rectangle prior to thresholding. Any coordinates in tesseract's image
-   * must be divided by scale before adding (rect_left, rect_top).
-   * The scaled_yres indicates the effective resolution of the binary image
-   * that tesseract has been given by the Thresholder.
-   * After the constructor, Begin has already been called.
-   */
-  PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
-               int scaled_yres, int rect_left, int rect_top, int rect_width,
-               int rect_height);
-  virtual ~PageIterator();
-
-  /**
-   * Page/ResultIterators may be copied! This makes it possible to iterate over
-   * all the objects at a lower level, while maintaining an iterator to
-   * objects at a higher level. These constructors DO NOT CALL Begin, so
-   * iterations will continue from the location of src.
-   */
-  PageIterator(const PageIterator &src);
-  const PageIterator &operator=(const PageIterator &src);
-
-  /** Are we positioned at the same location as other? */
-  bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
-
-  // ============= Moving around within the page ============.
-
-  /**
-   * Moves the iterator to point to the start of the page to begin an
-   * iteration.
-   */
-  virtual void Begin();
-
-  /**
-   * Moves the iterator to the beginning of the paragraph.
-   * This class implements this functionality by moving it to the zero indexed
-   * blob of the first (leftmost) word on the first row of the paragraph.
-   */
-  virtual void RestartParagraph();
-
-  /**
-   * Return whether this iterator points anywhere in the first textline of a
-   * paragraph.
-   */
-  bool IsWithinFirstTextlineOfParagraph() const;
-
-  /**
-   * Moves the iterator to the beginning of the text line.
-   * This class implements this functionality by moving it to the zero indexed
-   * blob of the first (leftmost) word of the row.
-   */
-  virtual void RestartRow();
-
-  /**
-   * Moves to the start of the next object at the given level in the
-   * page hierarchy, and returns false if the end of the page was reached.
-   * NOTE that RIL_SYMBOL will skip non-text blocks, but all other
-   * PageIteratorLevel level values will visit each non-text block once.
-   * Think of non text blocks as containing a single para, with a single line,
-   * with a single imaginary word.
-   * Calls to Next with different levels may be freely intermixed.
-   * This function iterates words in right-to-left scripts correctly, if
-   * the appropriate language has been loaded into Tesseract.
-   */
-  virtual bool Next(PageIteratorLevel level);
-
-  /**
-   * Returns true if the iterator is at the start of an object at the given
-   * level.
-   *
-   * For instance, suppose an iterator it is pointed to the first symbol of the
-   * first word of the third line of the second paragraph of the first block in
-   * a page, then:
-   *   it.IsAtBeginningOf(RIL_BLOCK) = false
-   *   it.IsAtBeginningOf(RIL_PARA) = false
-   *   it.IsAtBeginningOf(RIL_TEXTLINE) = true
-   *   it.IsAtBeginningOf(RIL_WORD) = true
-   *   it.IsAtBeginningOf(RIL_SYMBOL) = true
-   */
-  virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
-
-  /**
-   * Returns whether the iterator is positioned at the last element in a
-   * given level. (e.g. the last word in a line, the last line in a block)
-   *
-   *     Here's some two-paragraph example
-   *   text.  It starts off innocuously
-   *   enough but quickly turns bizarre.
-   *     The author inserts a cornucopia
-   *   of words to guard against confused
-   *   references.
-   *
-   * Now take an iterator it pointed to the start of "bizarre."
-   *  it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
-   *  it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
-   *  it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
-   */
-  virtual bool IsAtFinalElement(PageIteratorLevel level,
-                                PageIteratorLevel element) const;
-
-  /**
-   * Returns whether this iterator is positioned
-   *   before other:   -1
-   *   equal to other:  0
-   *   after other:     1
-   */
-  int Cmp(const PageIterator &other) const;
-
-  // ============= Accessing data ==============.
-  // Coordinate system:
-  // Integer coordinates are at the cracks between the pixels.
-  // The top-left corner of the top-left pixel in the image is at (0,0).
-  // The bottom-right corner of the bottom-right pixel in the image is at
-  // (width, height).
-  // Every bounding box goes from the top-left of the top-left contained
-  // pixel to the bottom-right of the bottom-right contained pixel, so
-  // the bounding box of the single top-left pixel in the image is:
-  // (0,0)->(1,1).
-  // If an image rectangle has been set in the API, then returned coordinates
-  // relate to the original (full) image, rather than the rectangle.
-
-  /**
-   * Controls what to include in a bounding box. Bounding boxes of all levels
-   * between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
-   * Between layout analysis and recognition, it isn't known where all
-   * diacritics belong, so this control is used to include or exclude some
-   * diacritics that are above or below the main body of the word. In most cases
-   * where the placement is obvious, and after recognition, it doesn't make as
-   * much difference, as the diacritics will already be included in the word.
-   */
-  void SetBoundingBoxComponents(bool include_upper_dots,
-                                bool include_lower_dots) {
-    include_upper_dots_ = include_upper_dots;
-    include_lower_dots_ = include_lower_dots;
-  }
-
-  /**
-   * Returns the bounding rectangle of the current object at the given level.
-   * See comment on coordinate system above.
-   * Returns false if there is no such object at the current position.
-   * The returned bounding box is guaranteed to match the size and position
-   * of the image returned by GetBinaryImage, but may clip foreground pixels
-   * from a grey image. The padding argument to GetImage can be used to expand
-   * the image to include more foreground pixels. See GetImage below.
-   */
-  bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
-                   int *bottom) const;
-  bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
-                   int *right, int *bottom) const;
-  /**
-   * Returns the bounding rectangle of the object in a coordinate system of the
-   * working image rectangle having its origin at (rect_left_, rect_top_) with
-   * respect to the original image and is scaled by a factor scale_.
-   */
-  bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
-                           int *right, int *bottom) const;
-
-  /** Returns whether there is no object of a given level. */
-  bool Empty(PageIteratorLevel level) const;
-
-  /**
-   * Returns the type of the current block.
-   * See tesseract/publictypes.h for PolyBlockType.
-   */
-  PolyBlockType BlockType() const;
-
-  /**
-   * Returns the polygon outline of the current block. The returned Pta must
-   * be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
-   * of the polygon, and the last edge is the line segment between the last
-   * point and the first point. nullptr will be returned if the iterator is
-   * at the end of the document or layout analysis was not used.
-   */
-  Pta *BlockPolygon() const;
-
-  /**
-   * Returns a binary image of the current object at the given level.
-   * The position and size match the return from BoundingBoxInternal, and so
-   * this could be upscaled with respect to the original input image.
-   * Use pixDestroy to delete the image after use.
-   */
-  Pix *GetBinaryImage(PageIteratorLevel level) const;
-
-  /**
-   * Returns an image of the current object at the given level in greyscale
-   * if available in the input. To guarantee a binary image use BinaryImage.
-   * NOTE that in order to give the best possible image, the bounds are
-   * expanded slightly over the binary connected component, by the supplied
-   * padding, so the top-left position of the returned image is returned
-   * in (left,top). These will most likely not match the coordinates
-   * returned by BoundingBox.
-   * If you do not supply an original image, you will get a binary one.
-   * Use pixDestroy to delete the image after use.
-   */
-  Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
-                int *left, int *top) const;
-
-  /**
-   * Returns the baseline of the current object at the given level.
-   * The baseline is the line that passes through (x1, y1) and (x2, y2).
-   * WARNING: with vertical text, baselines may be vertical!
-   * Returns false if there is no baseline at the current position.
-   */
-  bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
-                int *y2) const;
-
-  // Returns the attributes of the current row.
-  void RowAttributes(float *row_height, float *descenders,
-                     float *ascenders) const;
-
-  /**
-   * Returns orientation for the block the iterator points to.
-   *   orientation, writing_direction, textline_order: see publictypes.h
-   *   deskew_angle: after rotating the block so the text orientation is
-   *                 upright, how many radians does one have to rotate the
-   *                 block anti-clockwise for it to be level?
-   *                   -Pi/4 <= deskew_angle <= Pi/4
-   */
-  void Orientation(tesseract::Orientation *orientation,
-                   tesseract::WritingDirection *writing_direction,
-                   tesseract::TextlineOrder *textline_order,
-                   float *deskew_angle) const;
-
-  /**
-   * Returns information about the current paragraph, if available.
-   *
-   *   justification -
-   *     LEFT if ragged right, or fully justified and script is left-to-right.
-   *     RIGHT if ragged left, or fully justified and script is right-to-left.
-   *     unknown if it looks like source code or we have very few lines.
-   *   is_list_item -
-   *     true if we believe this is a member of an ordered or unordered list.
-   *   is_crown -
-   *     true if the first line of the paragraph is aligned with the other
-   *     lines of the paragraph even though subsequent paragraphs have first
-   *     line indents.  This typically indicates that this is the continuation
-   *     of a previous paragraph or that it is the very first paragraph in
-   *     the chapter.
-   *   first_line_indent -
-   *     For LEFT aligned paragraphs, the first text line of paragraphs of
-   *     this kind are indented this many pixels from the left edge of the
-   *     rest of the paragraph.
-   *     for RIGHT aligned paragraphs, the first text line of paragraphs of
-   *     this kind are indented this many pixels from the right edge of the
-   *     rest of the paragraph.
-   *     NOTE 1: This value may be negative.
-   *     NOTE 2: if *is_crown == true, the first line of this paragraph is
-   *             actually flush, and first_line_indent is set to the "common"
-   *             first_line_indent for subsequent paragraphs in this block
-   *             of text.
-   */
-  void ParagraphInfo(tesseract::ParagraphJustification *justification,
-                     bool *is_list_item, bool *is_crown,
-                     int *first_line_indent) const;
-
-  // If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
-  // of the current word to the given pointer (takes ownership of the pointer)
-  // and returns true.
-  // Can only be used when iterating on the word level.
-  bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
-
-protected:
-  /**
-   * Sets up the internal data for iterating the blobs of a new word, then
-   * moves the iterator to the given offset.
-   */
-  void BeginWord(int offset);
-
-  /** Pointer to the page_res owned by the API. */
-  PAGE_RES *page_res_;
-  /** Pointer to the Tesseract object owned by the API. */
-  Tesseract *tesseract_;
-  /**
-   * The iterator to the page_res_. Owned by this ResultIterator.
-   * A pointer just to avoid dragging in Tesseract includes.
-   */
-  PAGE_RES_IT *it_;
-  /**
-   * The current input WERD being iterated. If there is an output from OCR,
-   * then word_ is nullptr. Owned by the API
-   */
-  WERD *word_;
-  /** The length of the current word_. */
-  int word_length_;
-  /** The current blob index within the word. */
-  int blob_index_;
-  /**
-   * Iterator to the blobs within the word. If nullptr, then we are iterating
-   * OCR results in the box_word.
-   * Owned by this ResultIterator.
-   */
-  C_BLOB_IT *cblob_it_;
-  /** Control over what to include in bounding boxes. */
-  bool include_upper_dots_;
-  bool include_lower_dots_;
-  /** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
-  int scale_;
-  int scaled_yres_;
-  int rect_left_;
-  int rect_top_;
-  int rect_width_;
-  int rect_height_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/publictypes.h
+++ b/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/publictypes.h
@ -1,281 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        publictypes.h
-// Description: Types used in both the API and internally
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
-#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
-
-namespace tesseract {
-
-// This file contains types that are used both by the API and internally
-// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
-// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
-// Restated: It is OK for low-level Tesseract files to include publictypes.h,
-// but not for the low-level tesseract code to include top-level API code.
-// This file should not use other Tesseract types, as that would drag
-// their includes into the API-level.
-
-/** Number of printers' points in an inch. The unit of the pointsize return. */
-constexpr int kPointsPerInch = 72;
-/**
- * Minimum believable resolution. Used as a default if there is no other
- * information, as it is safer to under-estimate than over-estimate.
- */
-constexpr int kMinCredibleResolution = 70;
-/** Maximum believable resolution.  */
-constexpr int kMaxCredibleResolution = 2400;
-/**
- * Ratio between median blob size and likely resolution. Used to estimate
- * resolution when none is provided. This is basically 1/usual text size in
- * inches.  */
-constexpr int kResolutionEstimationFactor = 10;
-
-/**
- * Possible types for a POLY_BLOCK or ColPartition.
- * Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
- * below, as well as kPolyBlockNames in layout_test.cc.
- * Used extensively by ColPartition, and POLY_BLOCK.
- */
-enum PolyBlockType {
-  PT_UNKNOWN,         // Type is not yet known. Keep as the first element.
-  PT_FLOWING_TEXT,    // Text that lives inside a column.
-  PT_HEADING_TEXT,    // Text that spans more than one column.
-  PT_PULLOUT_TEXT,    // Text that is in a cross-column pull-out region.
-  PT_EQUATION,        // Partition belonging to an equation region.
-  PT_INLINE_EQUATION, // Partition has inline equation.
-  PT_TABLE,           // Partition belonging to a table region.
-  PT_VERTICAL_TEXT,   // Text-line runs vertically.
-  PT_CAPTION_TEXT,    // Text that belongs to an image.
-  PT_FLOWING_IMAGE,   // Image that lives inside a column.
-  PT_HEADING_IMAGE,   // Image that spans more than one column.
-  PT_PULLOUT_IMAGE,   // Image that is in a cross-column pull-out region.
-  PT_HORZ_LINE,       // Horizontal Line.
-  PT_VERT_LINE,       // Vertical Line.
-  PT_NOISE,           // Lies outside of any column.
-  PT_COUNT
-};
-
-/** Returns true if PolyBlockType is of horizontal line type */
-inline bool PTIsLineType(PolyBlockType type) {
-  return type == PT_HORZ_LINE || type == PT_VERT_LINE;
-}
-/** Returns true if PolyBlockType is of image type */
-inline bool PTIsImageType(PolyBlockType type) {
-  return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
-         type == PT_PULLOUT_IMAGE;
-}
-/** Returns true if PolyBlockType is of text type */
-inline bool PTIsTextType(PolyBlockType type) {
-  return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
-         type == PT_PULLOUT_TEXT || type == PT_TABLE ||
-         type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
-         type == PT_INLINE_EQUATION;
-}
-// Returns true if PolyBlockType is of pullout(inter-column) type
-inline bool PTIsPulloutType(PolyBlockType type) {
-  return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
-}
-
-/**
- *  +------------------+  Orientation Example:
- *  | 1 Aaaa Aaaa Aaaa |  ====================
- *  | Aaa aa aaa aa    |  To left is a diagram of some (1) English and
- *  | aaaaaa A aa aaa. |  (2) Chinese text and a (3) photo credit.
- *  |                2 |
- *  |   #######  c c C |  Upright Latin characters are represented as A and a.
- *  |   #######  c c c |  '<' represents a latin character rotated
- *  | < #######  c c c |      anti-clockwise 90 degrees.
- *  | < #######  c   c |
- *  | < #######  .   c |  Upright Chinese characters are represented C and c.
- *  | 3 #######      c |
- *  +------------------+  NOTA BENE: enum values here should match goodoc.proto
-
- * If you orient your head so that "up" aligns with Orientation,
- * then the characters will appear "right side up" and readable.
- *
- * In the example above, both the English and Chinese paragraphs are oriented
- * so their "up" is the top of the page (page up).  The photo credit is read
- * with one's head turned leftward ("up" is to page left).
- *
- * The values of this enum match the convention of Tesseract's osdetect.h
-*/
-enum Orientation {
-  ORIENTATION_PAGE_UP = 0,
-  ORIENTATION_PAGE_RIGHT = 1,
-  ORIENTATION_PAGE_DOWN = 2,
-  ORIENTATION_PAGE_LEFT = 3,
-};
-
-/**
- * The grapheme clusters within a line of text are laid out logically
- * in this direction, judged when looking at the text line rotated so that
- * its Orientation is "page up".
- *
- * For English text, the writing direction is left-to-right.  For the
- * Chinese text in the above example, the writing direction is top-to-bottom.
- */
-enum WritingDirection {
-  WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
-  WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
-  WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
-};
-
-/**
- * The text lines are read in the given sequence.
- *
- * In English, the order is top-to-bottom.
- * In Chinese, vertical text lines are read right-to-left.  Mongolian is
- * written in vertical columns top to bottom like Chinese, but the lines
- * order left-to right.
- *
- * Note that only some combinations make sense.  For example,
- * WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
- */
-enum TextlineOrder {
-  TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
-  TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
-  TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
-};
-
-/**
- * Possible modes for page layout analysis. These *must* be kept in order
- * of decreasing amount of layout analysis to be done, except for OSD_ONLY,
- * so that the inequality test macros below work.
- */
-enum PageSegMode {
-  PSM_OSD_ONLY = 0,      ///< Orientation and script detection only.
-  PSM_AUTO_OSD = 1,      ///< Automatic page segmentation with orientation and
-                         ///< script detection. (OSD)
-  PSM_AUTO_ONLY = 2,     ///< Automatic page segmentation, but no OSD, or OCR.
-  PSM_AUTO = 3,          ///< Fully automatic page segmentation, but no OSD.
-  PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
-  PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
-                                  ///< vertically aligned text.
-  PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
-  PSM_SINGLE_LINE = 7,  ///< Treat the image as a single text line.
-  PSM_SINGLE_WORD = 8,  ///< Treat the image as a single word.
-  PSM_CIRCLE_WORD = 9,  ///< Treat the image as a single word in a circle.
-  PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
-  PSM_SPARSE_TEXT =
-      11, ///< Find as much text as possible in no particular order.
-  PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
-  PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
-                     ///< hacks that are Tesseract-specific.
-
-  PSM_COUNT ///< Number of enum entries.
-};
-
-/**
- * Inline functions that act on a PageSegMode to determine whether components of
- * layout analysis are enabled.
- * *Depend critically on the order of elements of PageSegMode.*
- * NOTE that arg is an int for compatibility with INT_PARAM.
- */
-inline bool PSM_OSD_ENABLED(int pageseg_mode) {
-  return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
-  return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
-}
-inline bool PSM_SPARSE(int pageseg_mode) {
-  return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
-}
-inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
-}
-inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
-  return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
-         pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-
-/**
- * enum of the elements of the page hierarchy, used in ResultIterator
- * to provide functions that operate on each level without having to
- * have 5x as many functions.
- */
-enum PageIteratorLevel {
-  RIL_BLOCK,    // Block of text/image/separator line.
-  RIL_PARA,     // Paragraph within a block.
-  RIL_TEXTLINE, // Line within a paragraph.
-  RIL_WORD,     // Word within a textline.
-  RIL_SYMBOL    // Symbol/character within a word.
-};
-
-/**
- * JUSTIFICATION_UNKNOWN
- *   The alignment is not clearly one of the other options.  This could happen
- *   for example if there are only one or two lines of text or the text looks
- *   like source code or poetry.
- *
- * NOTA BENE: Fully justified paragraphs (text aligned to both left and right
- *    margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
- *    is written with a left-to-right script and with JUSTIFICATION_RIGHT if
- *    their text is written in a right-to-left script.
- *
- * Interpretation for text read in vertical lines:
- *   "Left" is wherever the starting reading position is.
- *
- * JUSTIFICATION_LEFT
- *   Each line, except possibly the first, is flush to the same left tab stop.
- *
- * JUSTIFICATION_CENTER
- *   The text lines of the paragraph are centered about a line going
- *   down through their middle of the text lines.
- *
- * JUSTIFICATION_RIGHT
- *   Each line, except possibly the first, is flush to the same right tab stop.
- */
-enum ParagraphJustification {
-  JUSTIFICATION_UNKNOWN,
-  JUSTIFICATION_LEFT,
-  JUSTIFICATION_CENTER,
-  JUSTIFICATION_RIGHT,
-};
-
-/**
- * When Tesseract/Cube is initialized we can choose to instantiate/load/run
- * only the Tesseract part, only the Cube part or both along with the combiner.
- * The preference of which engine to use is stored in tessedit_ocr_engine_mode.
- *
- * ATTENTION: When modifying this enum, please make sure to make the
- * appropriate changes to all the enums mirroring it (e.g. OCREngine in
- * cityblock/workflow/detection/detection_storage.proto). Such enums will
- * mention the connection to OcrEngineMode in the comments.
- */
-enum OcrEngineMode {
-  OEM_TESSERACT_ONLY,          // Run Tesseract only - fastest; deprecated
-  OEM_LSTM_ONLY,               // Run just the LSTM line recognizer.
-  OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
-                               // to Tesseract when things get difficult.
-                               // deprecated
-  OEM_DEFAULT,                 // Specify this mode when calling init_*(),
-                               // to indicate that any of the above modes
-                               // should be automatically inferred from the
-                               // variables in the language-specific config,
-                               // command-line configs, or if not specified
-                               // in any of the above should be set to the
-                               // default OEM_TESSERACT_ONLY.
-  OEM_COUNT                    // Number of OEMs
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
--- a/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/renderer.h
+++ b/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/renderer.h
@ -1,311 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        renderer.h
-// Description: Rendering interface to inject into TessBaseAPI
-//
-// (C) Copyright 2011, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_RENDERER_H_
-#define TESSERACT_API_RENDERER_H_
-
-#include "export.h"
-
-// To avoid collision with other typenames include the ABSOLUTE MINIMUM
-// complexity of includes here. Use forward declarations wherever possible
-// and hide includes of complex types in baseapi.cpp.
-#include <cstdint>
-#include <string> // for std::string
-#include <vector> // for std::vector
-
-struct Pix;
-
-namespace tesseract {
-
-class TessBaseAPI;
-
-/**
- * Interface for rendering tesseract results into a document, such as text,
- * HOCR or pdf. This class is abstract. Specific classes handle individual
- * formats. This interface is then used to inject the renderer class into
- * tesseract when processing images.
- *
- * For simplicity implementing this with tesseract version 3.01,
- * the renderer contains document state that is cleared from document
- * to document just as the TessBaseAPI is. This way the base API can just
- * delegate its rendering functionality to injected renderers, and the
- * renderers can manage the associated state needed for the specific formats
- * in addition to the heuristics for producing it.
- */
-class TESS_API TessResultRenderer {
-public:
-  virtual ~TessResultRenderer();
-
-  // Takes ownership of pointer so must be new'd instance.
-  // Renderers aren't ordered, but appends the sequences of next parameter
-  // and existing next(). The renderers should be unique across both lists.
-  void insert(TessResultRenderer *next);
-
-  // Returns the next renderer or nullptr.
-  TessResultRenderer *next() {
-    return next_;
-  }
-
-  /**
-   * Starts a new document with the given title.
-   * This clears the contents of the output data.
-   * Title should use UTF-8 encoding.
-   */
-  bool BeginDocument(const char *title);
-
-  /**
-   * Adds the recognized text from the source image to the current document.
-   * Invalid if BeginDocument not yet called.
-   *
-   * Note that this API is a bit weird but is designed to fit into the
-   * current TessBaseAPI implementation where the api has lots of state
-   * information that we might want to add in.
-   */
-  bool AddImage(TessBaseAPI *api);
-
-  /**
-   * Finishes the document and finalizes the output data
-   * Invalid if BeginDocument not yet called.
-   */
-  bool EndDocument();
-
-  const char *file_extension() const {
-    return file_extension_;
-  }
-  const char *title() const {
-    return title_.c_str();
-  }
-
-  // Is everything fine? Otherwise something went wrong.
-  bool happy() const {
-    return happy_;
-  }
-
-  /**
-   * Returns the index of the last image given to AddImage
-   * (i.e. images are incremented whether the image succeeded or not)
-   *
-   * This is always defined. It means either the number of the
-   * current image, the last image ended, or in the completed document
-   * depending on when in the document lifecycle you are looking at it.
-   * Will return -1 if a document was never started.
-   */
-  int imagenum() const {
-    return imagenum_;
-  }
-
-protected:
-  /**
-   * Called by concrete classes.
-   *
-   * outputbase is the name of the output file excluding
-   * extension. For example, "/path/to/chocolate-chip-cookie-recipe"
-   *
-   * extension indicates the file extension to be used for output
-   * files. For example "pdf" will produce a .pdf file, and "hocr"
-   * will produce .hocr files.
-   */
-  TessResultRenderer(const char *outputbase, const char *extension);
-
-  // Hook for specialized handling in BeginDocument()
-  virtual bool BeginDocumentHandler();
-
-  // This must be overridden to render the OCR'd results
-  virtual bool AddImageHandler(TessBaseAPI *api) = 0;
-
-  // Hook for specialized handling in EndDocument()
-  virtual bool EndDocumentHandler();
-
-  // Renderers can call this to append '\0' terminated strings into
-  // the output string returned by GetOutput.
-  // This method will grow the output buffer if needed.
-  void AppendString(const char *s);
-
-  // Renderers can call this to append binary byte sequences into
-  // the output string returned by GetOutput. Note that s is not necessarily
-  // '\0' terminated (and can contain '\0' within it).
-  // This method will grow the output buffer if needed.
-  void AppendData(const char *s, int len);
-
-private:
-  TessResultRenderer *next_;   // Can link multiple renderers together
-  FILE *fout_;                 // output file pointer
-  const char *file_extension_; // standard extension for generated output
-  std::string title_;          // title of document being rendered
-  int imagenum_;               // index of last image added
-  bool happy_;                 // I get grumpy when the disk fills up, etc.
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessTextRenderer : public TessResultRenderer {
-public:
-  explicit TessTextRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into an hocr text string
- */
-class TESS_API TessHOcrRenderer : public TessResultRenderer {
-public:
-  explicit TessHOcrRenderer(const char *outputbase, bool font_info);
-  explicit TessHOcrRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool font_info_; // whether to print font information
-};
-
-/**
- * Renders tesseract output into an alto text string
- */
-class TESS_API TessAltoRenderer : public TessResultRenderer {
-public:
-  explicit TessAltoRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool begin_document;
-};
-
-/**
- * Renders Tesseract output into a TSV string
- */
-class TESS_API TessTsvRenderer : public TessResultRenderer {
-public:
-  explicit TessTsvRenderer(const char *outputbase, bool font_info);
-  explicit TessTsvRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool font_info_; // whether to print font information
-};
-
-/**
- * Renders tesseract output into searchable PDF
- */
-class TESS_API TessPDFRenderer : public TessResultRenderer {
-public:
-  // datadir is the location of the TESSDATA. We need it because
-  // we load a custom PDF font from this location.
-  TessPDFRenderer(const char *outputbase, const char *datadir,
-                  bool textonly = false);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  // We don't want to have every image in memory at once,
-  // so we store some metadata as we go along producing
-  // PDFs one page at a time. At the end, that metadata is
-  // used to make everything that isn't easily handled in a
-  // streaming fashion.
-  long int obj_;                  // counter for PDF objects
-  std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
-  std::vector<long int> pages_;   // object number for every /Page object
-  std::string datadir_;           // where to find the custom font
-  bool textonly_;                 // skip images if set
-  // Bookkeeping only. DIY = Do It Yourself.
-  void AppendPDFObjectDIY(size_t objectsize);
-  // Bookkeeping + emit data.
-  void AppendPDFObject(const char *data);
-  // Create the /Contents object for an entire page.
-  char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
-  // Turn an image into a PDF object. Only transcode if we have to.
-  static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
-                            char **pdf_object, long int *pdf_object_size,
-                            int jpg_quality);
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessUnlvRenderer : public TessResultRenderer {
-public:
-  explicit TessUnlvRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string for LSTMBox
- */
-class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
-public:
-  explicit TessLSTMBoxRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessBoxTextRenderer : public TessResultRenderer {
-public:
-  explicit TessBoxTextRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string in WordStr format
- */
-class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
-public:
-  explicit TessWordStrBoxRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-/**
- * Renders tesseract output into an osd text string
- */
-class TESS_API TessOsdRenderer : public TessResultRenderer {
-public:
-  explicit TessOsdRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-#endif // ndef DISABLED_LEGACY_ENGINE
-
-} // namespace tesseract.
-
-#endif // TESSERACT_API_RENDERER_H_
--- a/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/resultiterator.h
+++ b/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/resultiterator.h
@ -1,250 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        resultiterator.h
-// Description: Iterator for tesseract results that is capable of
-//              iterating in proper reading order over Bi Directional
-//              (e.g. mixed Hebrew and English) text.
-// Author:      David Eger
-//
-// (C) Copyright 2011, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
-#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
-
-#include "export.h"            // for TESS_API, TESS_LOCAL
-#include "ltrresultiterator.h" // for LTRResultIterator
-#include "publictypes.h"       // for PageIteratorLevel
-#include "unichar.h"           // for StrongScriptDirection
-
-#include <set>    // for std::pair
-#include <vector> // for std::vector
-
-namespace tesseract {
-
-class TESS_API ResultIterator : public LTRResultIterator {
-public:
-  static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
-
-  /**
-   * ResultIterator is copy constructible!
-   * The default copy constructor works just fine for us.
-   */
-  ~ResultIterator() override = default;
-
-  // ============= Moving around within the page ============.
-  /**
-   * Moves the iterator to point to the start of the page to begin
-   * an iteration.
-   */
-  void Begin() override;
-
-  /**
-   * Moves to the start of the next object at the given level in the
-   * page hierarchy in the appropriate reading order and returns false if
-   * the end of the page was reached.
-   * NOTE that RIL_SYMBOL will skip non-text blocks, but all other
-   * PageIteratorLevel level values will visit each non-text block once.
-   * Think of non text blocks as containing a single para, with a single line,
-   * with a single imaginary word.
-   * Calls to Next with different levels may be freely intermixed.
-   * This function iterates words in right-to-left scripts correctly, if
-   * the appropriate language has been loaded into Tesseract.
-   */
-  bool Next(PageIteratorLevel level) override;
-
-  /**
-   * IsAtBeginningOf() returns whether we're at the logical beginning of the
-   * given level.  (as opposed to ResultIterator's left-to-right top-to-bottom
-   * order).  Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
-   * For a full description, see pageiterator.h
-   */
-  bool IsAtBeginningOf(PageIteratorLevel level) const override;
-
-  /**
-   * Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
-   * For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
-   * point at the last word in a paragraph.  See PageIterator for full comment.
-   */
-  bool IsAtFinalElement(PageIteratorLevel level,
-                        PageIteratorLevel element) const override;
-
-  // ============= Functions that refer to words only ============.
-  // Returns the number of blanks before the current word.
-  int BlanksBeforeWord() const;
-
-  // ============= Accessing data ==============.
-
-  /**
-   * Returns the null terminated UTF-8 encoded text string for the current
-   * object at the given level. Use delete [] to free after use.
-   */
-  virtual char *GetUTF8Text(PageIteratorLevel level) const;
-
-  /**
-   * Returns the LSTM choices for every LSTM timestep for the current word.
-   */
-  virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
-      *GetRawLSTMTimesteps() const;
-  virtual std::vector<std::vector<std::pair<const char *, float>>>
-      *GetBestLSTMSymbolChoices() const;
-
-  /**
-   * Return whether the current paragraph's dominant reading direction
-   * is left-to-right (as opposed to right-to-left).
-   */
-  bool ParagraphIsLtr() const;
-
-  // ============= Exposed only for testing =============.
-
-  /**
-   * Yields the reading order as a sequence of indices and (optional)
-   * meta-marks for a set of words (given left-to-right).
-   * The meta marks are passed as negative values:
-   *   kMinorRunStart  Start of minor direction text.
-   *   kMinorRunEnd    End of minor direction text.
-   *   kComplexWord    The next indexed word contains both left-to-right and
-   *                    right-to-left characters and was treated as neutral.
-   *
-   * For example, suppose we have five words in a text line,
-   * indexed [0,1,2,3,4] from the leftmost side of the text line.
-   * The following are all believable reading_orders:
-   *
-   * Left-to-Right (in ltr paragraph):
-   *     { 0, 1, 2, 3, 4 }
-   * Left-to-Right (in rtl paragraph):
-   *     { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
-   * Right-to-Left (in rtl paragraph):
-   *     { 4, 3, 2, 1, 0 }
-   * Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
-   *     { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
-   */
-  static void CalculateTextlineOrder(
-      bool paragraph_is_ltr,
-      const std::vector<StrongScriptDirection> &word_dirs,
-      std::vector<int> *reading_order);
-
-  static const int kMinorRunStart;
-  static const int kMinorRunEnd;
-  static const int kComplexWord;
-
-protected:
-  /**
-   * We presume the data associated with the given iterator will outlive us.
-   * NB: This is private because it does something that is non-obvious:
-   *   it resets to the beginning of the paragraph instead of staying wherever
-   *   resit might have pointed.
-   */
-  explicit ResultIterator(const LTRResultIterator &resit);
-
-private:
-  /**
-   * Calculates the current paragraph's dominant writing direction.
-   * Typically, members should use current_paragraph_ltr_ instead.
-   */
-  bool CurrentParagraphIsLtr() const;
-
-  /**
-   * Returns word indices as measured from resit->RestartRow() = index 0
-   * for the reading order of words within a textline given an iterator
-   * into the middle of the text line.
-   * In addition to non-negative word indices, the following negative values
-   * may be inserted:
-   *   kMinorRunStart  Start of minor direction text.
-   *   kMinorRunEnd    End of minor direction text.
-   *   kComplexWord    The previous word contains both left-to-right and
-   *                   right-to-left characters and was treated as neutral.
-   */
-  void CalculateTextlineOrder(bool paragraph_is_ltr,
-                              const LTRResultIterator &resit,
-                              std::vector<int> *indices) const;
-  /** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
-  void CalculateTextlineOrder(bool paragraph_is_ltr,
-                              const LTRResultIterator &resit,
-                              std::vector<StrongScriptDirection> *ssd,
-                              std::vector<int> *indices) const;
-
-  /**
-   * What is the index of the current word in a strict left-to-right reading
-   * of the row?
-   */
-  int LTRWordIndex() const;
-
-  /**
-   * Given an iterator pointing at a word, returns the logical reading order
-   * of blob indices for the word.
-   */
-  void CalculateBlobOrder(std::vector<int> *blob_indices) const;
-
-  /** Precondition: current_paragraph_is_ltr_ is set. */
-  void MoveToLogicalStartOfTextline();
-
-  /**
-   * Precondition: current_paragraph_is_ltr_ and in_minor_direction_
-   * are set.
-   */
-  void MoveToLogicalStartOfWord();
-
-  /** Are we pointing at the final (reading order) symbol of the word? */
-  bool IsAtFinalSymbolOfWord() const;
-
-  /** Are we pointing at the first (reading order) symbol of the word? */
-  bool IsAtFirstSymbolOfWord() const;
-
-  /**
-   * Append any extra marks that should be appended to this word when printed.
-   * Mostly, these are Unicode BiDi control characters.
-   */
-  void AppendSuffixMarks(std::string *text) const;
-
-  /** Appends the current word in reading order to the given buffer.*/
-  void AppendUTF8WordText(std::string *text) const;
-
-  /**
-   * Appends the text of the current text line, *assuming this iterator is
-   * positioned at the beginning of the text line*  This function
-   * updates the iterator to point to the first position past the text line.
-   * Each textline is terminated in a single newline character.
-   * If the textline ends a paragraph, it gets a second terminal newline.
-   */
-  void IterateAndAppendUTF8TextlineText(std::string *text);
-
-  /**
-   * Appends the text of the current paragraph in reading order
-   * to the given buffer.
-   * Each textline is terminated in a single newline character, and the
-   * paragraph gets an extra newline at the end.
-   */
-  void AppendUTF8ParagraphText(std::string *text) const;
-
-  /** Returns whether the bidi_debug flag is set to at least min_level. */
-  bool BidiDebug(int min_level) const;
-
-  bool current_paragraph_is_ltr_;
-
-  /**
-   * Is the currently pointed-at character at the beginning of
-   * a minor-direction run?
-   */
-  bool at_beginning_of_minor_run_;
-
-  /** Is the currently pointed-at character in a minor-direction sequence? */
-  bool in_minor_direction_;
-
-  /**
-   * Should detected inter-word spaces be preserved, or "compressed" to a single
-   * space character (default behavior).
-   */
-  bool preserve_interword_spaces_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/unichar.h
+++ b/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/unichar.h
@ -1,174 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        unichar.h
-// Description: Unicode character/ligature class.
-// Author:      Ray Smith
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCUTIL_UNICHAR_H_
-#define TESSERACT_CCUTIL_UNICHAR_H_
-
-#include "export.h"
-
-#include <memory.h>
-#include <cstring>
-#include <string>
-#include <vector>
-
-namespace tesseract {
-
-// Maximum number of characters that can be stored in a UNICHAR. Must be
-// at least 4. Must not exceed 31 without changing the coding of length.
-#define UNICHAR_LEN 30
-
-// A UNICHAR_ID is the unique id of a unichar.
-using UNICHAR_ID = int;
-
-// A variable to indicate an invalid or uninitialized unichar id.
-static const int INVALID_UNICHAR_ID = -1;
-// A special unichar that corresponds to INVALID_UNICHAR_ID.
-static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
-
-enum StrongScriptDirection {
-  DIR_NEUTRAL = 0,       // Text contains only neutral characters.
-  DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
-  DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
-  DIR_MIX = 3,           // Text contains a mixture of left-to-right
-                         // and right-to-left characters.
-};
-
-using char32 = signed int;
-
-// The UNICHAR class holds a single classification result. This may be
-// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
-// multiple Unicode characters representing the NFKC expansion of a ligature
-// such as fi, ffl etc. These are also stored as utf8.
-class TESS_API UNICHAR {
-public:
-  UNICHAR() {
-    memset(chars, 0, UNICHAR_LEN);
-  }
-
-  // Construct from a utf8 string. If len<0 then the string is null terminated.
-  // If the string is too long to fit in the UNICHAR then it takes only what
-  // will fit.
-  UNICHAR(const char *utf8_str, int len);
-
-  // Construct from a single UCS4 character.
-  explicit UNICHAR(int unicode);
-
-  // Default copy constructor and operator= are OK.
-
-  // Get the first character as UCS-4.
-  int first_uni() const;
-
-  // Get the length of the UTF8 string.
-  int utf8_len() const {
-    int len = chars[UNICHAR_LEN - 1];
-    return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
-  }
-
-  // Get a UTF8 string, but NOT nullptr terminated.
-  const char *utf8() const {
-    return chars;
-  }
-
-  // Get a terminated UTF8 string: Must delete[] it after use.
-  char *utf8_str() const;
-
-  // Get the number of bytes in the first character of the given utf8 string.
-  static int utf8_step(const char *utf8_str);
-
-  // A class to simplify iterating over and accessing elements of a UTF8
-  // string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
-  // take ownership of the underlying byte array. It also does not permit
-  // modification of the array (as the name suggests).
-  //
-  // Example:
-  //   for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
-  //        it != UNICHAR::end(str, len);
-  //        ++it) {
-  //     printf("UCS-4 symbol code = %d\n", *it);
-  //     char buf[5];
-  //     int char_len = it.get_utf8(buf); buf[char_len] = '\0';
-  //     printf("Char = %s\n", buf);
-  //   }
-  class TESS_API const_iterator {
-    using CI = const_iterator;
-
-  public:
-    // Step to the next UTF8 character.
-    // If the current position is at an illegal UTF8 character, then print an
-    // error message and step by one byte. If the current position is at a
-    // nullptr value, don't step past it.
-    const_iterator &operator++();
-
-    // Return the UCS-4 value at the current position.
-    // If the current position is at an illegal UTF8 value, return a single
-    // space character.
-    int operator*() const;
-
-    // Store the UTF-8 encoding of the current codepoint into buf, which must be
-    // at least 4 bytes long. Return the number of bytes written.
-    // If the current position is at an illegal UTF8 value, writes a single
-    // space character and returns 1.
-    // Note that this method does not null-terminate the buffer.
-    int get_utf8(char *buf) const;
-    // Returns the number of bytes of the current codepoint. Returns 1 if the
-    // current position is at an illegal UTF8 value.
-    int utf8_len() const;
-    // Returns true if the UTF-8 encoding at the current position is legal.
-    bool is_legal() const;
-
-    // Return the pointer into the string at the current position.
-    const char *utf8_data() const {
-      return it_;
-    }
-
-    // Iterator equality operators.
-    friend bool operator==(const CI &lhs, const CI &rhs) {
-      return lhs.it_ == rhs.it_;
-    }
-    friend bool operator!=(const CI &lhs, const CI &rhs) {
-      return !(lhs == rhs);
-    }
-
-  private:
-    friend class UNICHAR;
-    explicit const_iterator(const char *it) : it_(it) {}
-
-    const char *it_; // Pointer into the string.
-  };
-
-  // Create a start/end iterator pointing to a string. Note that these methods
-  // are static and do NOT create a copy or take ownership of the underlying
-  // array.
-  static const_iterator begin(const char *utf8_str, int byte_length);
-  static const_iterator end(const char *utf8_str, int byte_length);
-
-  // Converts a utf-8 string to a vector of unicodes.
-  // Returns an empty vector if the input contains invalid UTF-8.
-  static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
-  // Converts a vector of unicodes to a utf8 string.
-  // Returns an empty string if the input contains an invalid unicode.
-  static std::string UTF32ToUTF8(const std::vector<char32> &str32);
-
-private:
-  // A UTF-8 representation of 1 or more Unicode characters.
-  // The last element (chars[UNICHAR_LEN - 1]) is a length if
-  // its value < UNICHAR_LEN, otherwise it is a genuine character.
-  char chars[UNICHAR_LEN]{};
-};
-
-} // namespace tesseract
-
-#endif // TESSERACT_CCUTIL_UNICHAR_H_
--- a/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/version.h
+++ b/third_party/ocr/tesseract-ocr/kylin/loongarch64/include/tesseract/version.h
@ -1,34 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        version.h
-// Description: Version information
-//
-// (C) Copyright 2018, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_VERSION_H_
-#define TESSERACT_API_VERSION_H_
-
-// clang-format off
-
-#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
-#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
-#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
-
-#define TESSERACT_VERSION          \
-  (TESSERACT_MAJOR_VERSION << 16 | \
-   TESSERACT_MINOR_VERSION <<  8 | \
-   TESSERACT_MICRO_VERSION)
-
-#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
-
-// clang-format on
-
-#endif // TESSERACT_API_VERSION_H_
--- a/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/baseapi.h
+++ b/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/baseapi.h
@ -1,812 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        baseapi.h
-// Description: Simple API for calling tesseract.
-// Author:      Ray Smith
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_BASEAPI_H_
-#define TESSERACT_API_BASEAPI_H_
-
-#ifdef HAVE_CONFIG_H
-#  include "config_auto.h" // DISABLED_LEGACY_ENGINE
-#endif
-
-#include "export.h"
-#include "pageiterator.h"
-#include "publictypes.h"
-#include "resultiterator.h"
-#include "unichar.h"
-
-#include "version.h"
-
-#include <cstdio>
-#include <vector> // for std::vector
-
-struct Pix;
-struct Pixa;
-struct Boxa;
-
-namespace tesseract {
-
-class PAGE_RES;
-class ParagraphModel;
-class BLOCK_LIST;
-class ETEXT_DESC;
-struct OSResults;
-class UNICHARSET;
-
-class Dawg;
-class Dict;
-class EquationDetect;
-class PageIterator;
-class ImageThresholder;
-class LTRResultIterator;
-class ResultIterator;
-class MutableIterator;
-class TessResultRenderer;
-class Tesseract;
-
-// Function to read a std::vector<char> from a whole file.
-// Returns false on failure.
-using FileReader = bool (*)(const char *filename, std::vector<char> *data);
-
-using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
-                               bool) const;
-using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
-                                                  int, const char *, int);
-
-/**
- * Base class for all tesseract APIs.
- * Specific classes can add ability to work on different inputs or produce
- * different outputs.
- * This class is mostly an interface layer on top of the Tesseract instance
- * class to hide the data types so that users of this class don't have to
- * include any other Tesseract headers.
- */
-class TESS_API TessBaseAPI {
-public:
-  TessBaseAPI();
-  virtual ~TessBaseAPI();
-  // Copy constructor and assignment operator are currently unsupported.
-  TessBaseAPI(TessBaseAPI const &) = delete;
-  TessBaseAPI &operator=(TessBaseAPI const &) = delete;
-
-  /**
-   * Returns the version identifier as a static string. Do not delete.
-   */
-  static const char *Version();
-
-  /**
-   * If compiled with OpenCL AND an available OpenCL
-   * device is deemed faster than serial code, then
-   * "device" is populated with the cl_device_id
-   * and returns sizeof(cl_device_id)
-   * otherwise *device=nullptr and returns 0.
-   */
-  static size_t getOpenCLDevice(void **device);
-
-  /**
-   * Set the name of the input file. Needed for training and
-   * reading a UNLV zone file, and for searchable PDF output.
-   */
-  void SetInputName(const char *name);
-  /**
-   * These functions are required for searchable PDF output.
-   * We need our hands on the input file so that we can include
-   * it in the PDF without transcoding. If that is not possible,
-   * we need the original image. Finally, resolution metadata
-   * is stored in the PDF so we need that as well.
-   */
-  const char *GetInputName();
-  // Takes ownership of the input pix.
-  void SetInputImage(Pix *pix);
-  Pix *GetInputImage();
-  int GetSourceYResolution();
-  const char *GetDatapath();
-
-  /** Set the name of the bonus output files. Needed only for debugging. */
-  void SetOutputName(const char *name);
-
-  /**
-   * Set the value of an internal "parameter."
-   * Supply the name of the parameter and the value as a string, just as
-   * you would in a config file.
-   * Returns false if the name lookup failed.
-   * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
-   * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
-   * SetVariable may be used before Init, but settings will revert to
-   * defaults on End().
-   *
-   * Note: Must be called after Init(). Only works for non-init variables
-   * (init variables should be passed to Init()).
-   */
-  bool SetVariable(const char *name, const char *value);
-  bool SetDebugVariable(const char *name, const char *value);
-
-  /**
-   * Returns true if the parameter was found among Tesseract parameters.
-   * Fills in value with the value of the parameter.
-   */
-  bool GetIntVariable(const char *name, int *value) const;
-  bool GetBoolVariable(const char *name, bool *value) const;
-  bool GetDoubleVariable(const char *name, double *value) const;
-
-  /**
-   * Returns the pointer to the string that represents the value of the
-   * parameter if it was found among Tesseract parameters.
-   */
-  const char *GetStringVariable(const char *name) const;
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-  /**
-   * Print Tesseract fonts table to the given file.
-   */
-  void PrintFontsTable(FILE *fp) const;
-
-#endif
-
-  /**
-   * Print Tesseract parameters to the given file.
-   */
-  void PrintVariables(FILE *fp) const;
-
-  /**
-   * Get value of named variable as a string, if it exists.
-   */
-  bool GetVariableAsString(const char *name, std::string *val) const;
-
-  /**
-   * Instances are now mostly thread-safe and totally independent,
-   * but some global parameters remain. Basically it is safe to use multiple
-   * TessBaseAPIs in different threads in parallel, UNLESS:
-   * you use SetVariable on some of the Params in classify and textord.
-   * If you do, then the effect will be to change it for all your instances.
-   *
-   * Start tesseract. Returns zero on success and -1 on failure.
-   * NOTE that the only members that may be called before Init are those
-   * listed above here in the class definition.
-   *
-   * The datapath must be the name of the tessdata directory.
-   * The language is (usually) an ISO 639-3 string or nullptr will default to
-   * eng. It is entirely safe (and eventually will be efficient too) to call
-   * Init multiple times on the same instance to change language, or just
-   * to reset the classifier.
-   * The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
-   * that multiple languages are to be loaded. Eg hin+eng will load Hindi and
-   * English. Languages may specify internally that they want to be loaded
-   * with one or more other languages, so the ~ sign is available to override
-   * that. Eg if hin were set to load eng by default, then hin+~eng would force
-   * loading only hin. The number of loaded languages is limited only by
-   * memory, with the caveat that loading additional languages will impact
-   * both speed and accuracy, as there is more work to do to decide on the
-   * applicable language, and there is more chance of hallucinating incorrect
-   * words.
-   * WARNING: On changing languages, all Tesseract parameters are reset
-   * back to their default values. (Which may vary between languages.)
-   * If you have a rare need to set a Variable that controls
-   * initialization for a second call to Init you should explicitly
-   * call End() and then use SetVariable before Init. This is only a very
-   * rare use case, since there are very few uses that require any parameters
-   * to be set before Init.
-   *
-   * If set_only_non_debug_params is true, only params that do not contain
-   * "debug" in the name will be set.
-   */
-  int Init(const char *datapath, const char *language, OcrEngineMode mode,
-           char **configs, int configs_size,
-           const std::vector<std::string> *vars_vec,
-           const std::vector<std::string> *vars_values,
-           bool set_only_non_debug_params);
-  int Init(const char *datapath, const char *language, OcrEngineMode oem) {
-    return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
-  }
-  int Init(const char *datapath, const char *language) {
-    return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
-                false);
-  }
-  // In-memory version reads the traineddata file directly from the given
-  // data[data_size] array, and/or reads data via a FileReader.
-  int Init(const char *data, int data_size, const char *language,
-           OcrEngineMode mode, char **configs, int configs_size,
-           const std::vector<std::string> *vars_vec,
-           const std::vector<std::string> *vars_values,
-           bool set_only_non_debug_params, FileReader reader);
-
-  /**
-   * Returns the languages string used in the last valid initialization.
-   * If the last initialization specified "deu+hin" then that will be
-   * returned. If hin loaded eng automatically as well, then that will
-   * not be included in this list. To find the languages actually
-   * loaded use GetLoadedLanguagesAsVector.
-   * The returned string should NOT be deleted.
-   */
-  const char *GetInitLanguagesAsString() const;
-
-  /**
-   * Returns the loaded languages in the vector of std::string.
-   * Includes all languages loaded by the last Init, including those loaded
-   * as dependencies of other loaded languages.
-   */
-  void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
-
-  /**
-   * Returns the available languages in the sorted vector of std::string.
-   */
-  void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
-
-  /**
-   * Init only for page layout analysis. Use only for calls to SetImage and
-   * AnalysePage. Calls that attempt recognition will generate an error.
-   */
-  void InitForAnalysePage();
-
-  /**
-   * Read a "config" file containing a set of param, value pairs.
-   * Searches the standard places: tessdata/configs, tessdata/tessconfigs
-   * and also accepts a relative or absolute path name.
-   * Note: only non-init params will be set (init params are set by Init()).
-   */
-  void ReadConfigFile(const char *filename);
-  /** Same as above, but only set debug params from the given config file. */
-  void ReadDebugConfigFile(const char *filename);
-
-  /**
-   * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
-   * The mode is stored as an IntParam so it can also be modified by
-   * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
-   */
-  void SetPageSegMode(PageSegMode mode);
-
-  /** Return the current page segmentation mode. */
-  PageSegMode GetPageSegMode() const;
-
-  /**
-   * Recognize a rectangle from an image and return the result as a string.
-   * May be called many times for a single Init.
-   * Currently has no error checking.
-   * Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
-   * Palette color images will not work properly and must be converted to
-   * 24 bit.
-   * Binary images of 1 bit per pixel may also be given but they must be
-   * byte packed with the MSB of the first byte being the first pixel, and a
-   * 1 represents WHITE. For binary images set bytes_per_pixel=0.
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   *
-   * Note that TesseractRect is the simplified convenience interface.
-   * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
-   * and one or more of the Get*Text functions below.
-   */
-  char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
-                      int bytes_per_line, int left, int top, int width,
-                      int height);
-
-  /**
-   * Call between pages or documents etc to free up memory and forget
-   * adaptive data.
-   */
-  void ClearAdaptiveClassifier();
-
-  /**
-   * @defgroup AdvancedAPI Advanced API
-   * The following methods break TesseractRect into pieces, so you can
-   * get hold of the thresholded image, get the text in different formats,
-   * get bounding boxes, confidences etc.
-   */
-  /* @{ */
-
-  /**
-   * Provide an image for Tesseract to recognize. Format is as
-   * TesseractRect above. Copies the image buffer and converts to Pix.
-   * SetImage clears all recognition results, and sets the rectangle to the
-   * full image, so it may be followed immediately by a GetUTF8Text, and it
-   * will automatically perform recognition.
-   */
-  void SetImage(const unsigned char *imagedata, int width, int height,
-                int bytes_per_pixel, int bytes_per_line);
-
-  /**
-   * Provide an image for Tesseract to recognize. As with SetImage above,
-   * Tesseract takes its own copy of the image, so it need not persist until
-   * after Recognize.
-   * Pix vs raw, which to use?
-   * Use Pix where possible. Tesseract uses Pix as its internal representation
-   * and it is therefore more efficient to provide a Pix directly.
-   */
-  void SetImage(Pix *pix);
-
-  /**
-   * Set the resolution of the source image in pixels per inch so font size
-   * information can be calculated in results.  Call this after SetImage().
-   */
-  void SetSourceResolution(int ppi);
-
-  /**
-   * Restrict recognition to a sub-rectangle of the image. Call after SetImage.
-   * Each SetRectangle clears the recogntion results so multiple rectangles
-   * can be recognized with the same image.
-   */
-  void SetRectangle(int left, int top, int width, int height);
-
-  /**
-   * Get a copy of the internal thresholded image from Tesseract.
-   * Caller takes ownership of the Pix and must pixDestroy it.
-   * May be called any time after SetImage, or after TesseractRect.
-   */
-  Pix *GetThresholdedImage();
-
-  /**
-   * Get the result of page layout analysis as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   */
-  Boxa *GetRegions(Pixa **pixa);
-
-  /**
-   * Get the textlines as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * If raw_image is true, then extract from the original image instead of the
-   * thresholded image and pad by raw_padding pixels.
-   * If blockids is not nullptr, the block-id of each line is also returned as
-   * an array of one element per line. delete [] after use. If paraids is not
-   * nullptr, the paragraph-id of each line within its block is also returned as
-   * an array of one element per line. delete [] after use.
-   */
-  Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
-                     int **blockids, int **paraids);
-  /*
-   Helper method to extract from the thresholded image. (most common usage)
-*/
-  Boxa *GetTextlines(Pixa **pixa, int **blockids) {
-    return GetTextlines(false, 0, pixa, blockids, nullptr);
-  }
-
-  /**
-   * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
-   * pair, in reading order. Enables downstream handling of non-rectangular
-   * regions.
-   * Can be called before or after Recognize.
-   * If blockids is not nullptr, the block-id of each line is also returned as
-   * an array of one element per line. delete [] after use.
-   */
-  Boxa *GetStrips(Pixa **pixa, int **blockids);
-
-  /**
-   * Get the words as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   */
-  Boxa *GetWords(Pixa **pixa);
-
-  /**
-   * Gets the individual connected (text) components (created
-   * after pages segmentation step, but before recognition)
-   * as a leptonica-style Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * Note: the caller is responsible for calling boxaDestroy()
-   * on the returned Boxa array and pixaDestroy() on cc array.
-   */
-  Boxa *GetConnectedComponents(Pixa **cc);
-
-  /**
-   * Get the given level kind of components (block, textline, word etc.) as a
-   * leptonica-style Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * If blockids is not nullptr, the block-id of each component is also returned
-   * as an array of one element per component. delete [] after use.
-   * If blockids is not nullptr, the paragraph-id of each component with its
-   * block is also returned as an array of one element per component. delete []
-   * after use. If raw_image is true, then portions of the original image are
-   * extracted instead of the thresholded image and padded with raw_padding. If
-   * text_only is true, then only text components are returned.
-   */
-  Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
-                           bool raw_image, int raw_padding, Pixa **pixa,
-                           int **blockids, int **paraids);
-  // Helper function to get binary images with no padding (most common usage).
-  Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
-                           Pixa **pixa, int **blockids) {
-    return GetComponentImages(level, text_only, false, 0, pixa, blockids,
-                              nullptr);
-  }
-
-  /**
-   * Returns the scale factor of the thresholded image that would be returned by
-   * GetThresholdedImage() and the various GetX() methods that call
-   * GetComponentImages().
-   * Returns 0 if no thresholder has been set.
-   */
-  int GetThresholdedImageScaleFactor() const;
-
-  /**
-   * Runs page layout analysis in the mode set by SetPageSegMode.
-   * May optionally be called prior to Recognize to get access to just
-   * the page layout results. Returns an iterator to the results.
-   * If merge_similar_words is true, words are combined where suitable for use
-   * with a line recognizer. Use if you want to use AnalyseLayout to find the
-   * textlines, and then want to process textline fragments with an external
-   * line recognizer.
-   * Returns nullptr on error or an empty page.
-   * The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  PageIterator *AnalyseLayout();
-  PageIterator *AnalyseLayout(bool merge_similar_words);
-
-  /**
-   * Recognize the image from SetAndThresholdImage, generating Tesseract
-   * internal structures. Returns 0 on success.
-   * Optional. The Get*Text functions below will call Recognize if needed.
-   * After Recognize, the output is kept internally until the next SetImage.
-   */
-  int Recognize(ETEXT_DESC *monitor);
-
-  /**
-   * Methods to retrieve information after SetAndThresholdImage(),
-   * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
-   */
-
-  /**
-   * Turns images into symbolic text.
-   *
-   * filename can point to a single image, a multi-page TIFF,
-   * or a plain text list of image filenames.
-   *
-   * retry_config is useful for debugging. If not nullptr, you can fall
-   * back to an alternate configuration if a page fails for some
-   * reason.
-   *
-   * timeout_millisec terminates processing if any single page
-   * takes too long. Set to 0 for unlimited time.
-   *
-   * renderer is responible for creating the output. For example,
-   * use the TessTextRenderer if you want plaintext output, or
-   * the TessPDFRender to produce searchable PDF.
-   *
-   * If tessedit_page_number is non-negative, will only process that
-   * single page. Works for multi-page tiff file, or filelist.
-   *
-   * Returns true if successful, false on error.
-   */
-  bool ProcessPages(const char *filename, const char *retry_config,
-                    int timeout_millisec, TessResultRenderer *renderer);
-  // Does the real work of ProcessPages.
-  bool ProcessPagesInternal(const char *filename, const char *retry_config,
-                            int timeout_millisec, TessResultRenderer *renderer);
-
-  /**
-   * Turn a single image into symbolic text.
-   *
-   * The pix is the image processed. filename and page_index are
-   * metadata used by side-effect processes, such as reading a box
-   * file or formatting as hOCR.
-   *
-   * See ProcessPages for descriptions of other parameters.
-   */
-  bool ProcessPage(Pix *pix, int page_index, const char *filename,
-                   const char *retry_config, int timeout_millisec,
-                   TessResultRenderer *renderer);
-
-  /**
-   * Get a reading-order iterator to the results of LayoutAnalysis and/or
-   * Recognize. The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  ResultIterator *GetIterator();
-
-  /**
-   * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
-   * The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  MutableIterator *GetMutableIterator();
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   */
-  char *GetUTF8Text();
-
-  /**
-   * Make a HTML-formatted string with hOCR markup from the internal
-   * data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * monitor can be used to
-   *  cancel the recognition
-   *  receive progress callbacks
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
-
-  /**
-   * Make a HTML-formatted string with hOCR markup from the internal
-   * data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetHOCRText(int page_number);
-
-  /**
-   * Make an XML-formatted string with Alto markup from the internal
-   * data structures.
-   */
-  char *GetAltoText(ETEXT_DESC *monitor, int page_number);
-
-  /**
-   * Make an XML-formatted string with Alto markup from the internal
-   * data structures.
-   */
-  char *GetAltoText(int page_number);
-
-  /**
-   * Make a TSV-formatted string from the internal data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetTSVText(int page_number);
-
-  /**
-   * Make a box file for LSTM training from the internal data structures.
-   * Constructs coordinates in the original image - not just the rectangle.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetLSTMBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded in the same
-   * format as a box file used in training.
-   * Constructs coordinates in the original image - not just the rectangle.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded in the same
-   * format as a WordStr box file used in training.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetWordStrBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UNLV format Latin-1 with specific reject and suspect codes.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetUNLVText();
-
-  /**
-   * Detect the orientation of the input image and apparent script (alphabet).
-   * orient_deg is the detected clockwise rotation of the input image in degrees
-   * (0, 90, 180, 270)
-   * orient_conf is the confidence (15.0 is reasonably confident)
-   * script_name is an ASCII string, the name of the script, e.g. "Latin"
-   * script_conf is confidence level in the script
-   * Returns true on success and writes values to each parameter as an output
-   */
-  bool DetectOrientationScript(int *orient_deg, float *orient_conf,
-                               const char **script_name, float *script_conf);
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   * page_number is a 0-based page index that will appear in the osd file.
-   */
-  char *GetOsdText(int page_number);
-
-  /** Returns the (average) confidence value between 0 and 100. */
-  int MeanTextConf();
-  /**
-   * Returns all word confidences (between 0 and 100) in an array, terminated
-   * by -1.  The calling function must delete [] after use.
-   * The number of confidences should correspond to the number of space-
-   * delimited words in GetUTF8Text.
-   */
-  int *AllWordConfidences();
-
-#ifndef DISABLED_LEGACY_ENGINE
-  /**
-   * Applies the given word to the adaptive classifier if possible.
-   * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
-   * tell the boundaries of the graphemes.
-   * Assumes that SetImage/SetRectangle have been used to set the image
-   * to the given word. The mode arg should be PSM_SINGLE_WORD or
-   * PSM_CIRCLE_WORD, as that will be used to control layout analysis.
-   * The currently set PageSegMode is preserved.
-   * Returns false if adaption was not possible for some reason.
-   */
-  bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
-#endif //  ndef DISABLED_LEGACY_ENGINE
-
-  /**
-   * Free up recognition results and any stored image data, without actually
-   * freeing any recognition data that would be time-consuming to reload.
-   * Afterwards, you must call SetImage or TesseractRect before doing
-   * any Recognize or Get* operation.
-   */
-  void Clear();
-
-  /**
-   * Close down tesseract and free up all memory. End() is equivalent to
-   * destructing and reconstructing your TessBaseAPI.
-   * Once End() has been used, none of the other API functions may be used
-   * other than Init and anything declared above it in the class definition.
-   */
-  void End();
-
-  /**
-   * Clear any library-level memory caches.
-   * There are a variety of expensive-to-load constant data structures (mostly
-   * language dictionaries) that are cached globally -- surviving the Init()
-   * and End() of individual TessBaseAPI's.  This function allows the clearing
-   * of these caches.
-   **/
-  static void ClearPersistentCache();
-
-  /**
-   * Check whether a word is valid according to Tesseract's language model
-   * @return 0 if the word is invalid, non-zero if valid.
-   * @warning temporary! This function will be removed from here and placed
-   * in a separate API at some future time.
-   */
-  int IsValidWord(const char *word) const;
-  // Returns true if utf8_character is defined in the UniCharset.
-  bool IsValidCharacter(const char *utf8_character) const;
-
-  bool GetTextDirection(int *out_offset, float *out_slope);
-
-  /** Sets Dict::letter_is_okay_ function to point to the given function. */
-  void SetDictFunc(DictFunc f);
-
-  /** Sets Dict::probability_in_context_ function to point to the given
-   * function.
-   */
-  void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
-
-  /**
-   * Estimates the Orientation And Script of the image.
-   * @return true if the image was processed successfully.
-   */
-  bool DetectOS(OSResults *);
-
-  /**
-   * Return text orientation of each block as determined by an earlier run
-   * of layout analysis.
-   */
-  void GetBlockTextOrientations(int **block_orientation,
-                                bool **vertical_writing);
-
-  /** This method returns the string form of the specified unichar. */
-  const char *GetUnichar(int unichar_id) const;
-
-  /** Return the pointer to the i-th dawg loaded into tesseract_ object. */
-  const Dawg *GetDawg(int i) const;
-
-  /** Return the number of dawgs loaded into tesseract_ object. */
-  int NumDawgs() const;
-
-  Tesseract *tesseract() const {
-    return tesseract_;
-  }
-
-  OcrEngineMode oem() const {
-    return last_oem_requested_;
-  }
-
-  void set_min_orientation_margin(double margin);
-  /* @} */
-
-protected:
-  /** Common code for setting the image. Returns true if Init has been called.
-   */
-  bool InternalSetImage();
-
-  /**
-   * Run the thresholder to make the thresholded image. If pix is not nullptr,
-   * the source is thresholded to pix instead of the internal IMAGE.
-   */
-  virtual bool Threshold(Pix **pix);
-
-  /**
-   * Find lines from the image making the BLOCK_LIST.
-   * @return 0 on success.
-   */
-  int FindLines();
-
-  /** Delete the pageres and block list ready for a new page. */
-  void ClearResults();
-
-  /**
-   * Return an LTR Result Iterator -- used only for training, as we really want
-   * to ignore all BiDi smarts at that point.
-   * delete once you're done with it.
-   */
-  LTRResultIterator *GetLTRIterator();
-
-  /**
-   * Return the length of the output text string, as UTF8, assuming
-   * one newline per line and one per block, with a terminator,
-   * and assuming a single character reject marker for each rejected character.
-   * Also return the number of recognized blobs in blob_count.
-   */
-  int TextLength(int *blob_count) const;
-
-  //// paragraphs.cpp ////////////////////////////////////////////////////
-  void DetectParagraphs(bool after_text_recognition);
-
-  const PAGE_RES *GetPageRes() const {
-    return page_res_;
-  }
-
-protected:
-  Tesseract *tesseract_;          ///< The underlying data object.
-  Tesseract *osd_tesseract_;      ///< For orientation & script detection.
-  EquationDetect *equ_detect_;    ///< The equation detector.
-  FileReader reader_;             ///< Reads files from any filesystem.
-  ImageThresholder *thresholder_; ///< Image thresholding module.
-  std::vector<ParagraphModel *> *paragraph_models_;
-  BLOCK_LIST *block_list_;           ///< The page layout.
-  PAGE_RES *page_res_;               ///< The page-level data.
-  std::string input_file_;           ///< Name used by training code.
-  std::string output_file_;          ///< Name used by debug code.
-  std::string datapath_;             ///< Current location of tessdata.
-  std::string language_;             ///< Last initialized language.
-  OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
-  bool recognition_done_;            ///< page_res_ contains recognition data.
-
-  /**
-   * @defgroup ThresholderParams Thresholder Parameters
-   * Parameters saved from the Thresholder. Needed to rebuild coordinates.
-   */
-  /* @{ */
-  int rect_left_;
-  int rect_top_;
-  int rect_width_;
-  int rect_height_;
-  int image_width_;
-  int image_height_;
-  /* @} */
-
-private:
-  // A list of image filenames gets special consideration
-  bool ProcessPagesFileList(FILE *fp, std::string *buf,
-                            const char *retry_config, int timeout_millisec,
-                            TessResultRenderer *renderer,
-                            int tessedit_page_number);
-  // TIFF supports multipage so gets special consideration.
-  bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
-                                 const char *filename, const char *retry_config,
-                                 int timeout_millisec,
-                                 TessResultRenderer *renderer,
-                                 int tessedit_page_number);
-}; // class TessBaseAPI.
-
-/** Escape a char string - remove &<>"' with HTML codes. */
-std::string HOcrEscape(const char *text);
-
-} // namespace tesseract
-
-#endif // TESSERACT_API_BASEAPI_H_
--- a/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/capi.h
+++ b/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/capi.h
@ -1,484 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        capi.h
-// Description: C-API TessBaseAPI
-//
-// (C) Copyright 2012, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef API_CAPI_H_
-#define API_CAPI_H_
-
-#include "export.h"
-
-#ifdef __cplusplus
-#  include <tesseract/baseapi.h>
-#  include <tesseract/ocrclass.h>
-#  include <tesseract/pageiterator.h>
-#  include <tesseract/renderer.h>
-#  include <tesseract/resultiterator.h>
-#endif
-
-#include <stdbool.h>
-#include <stdio.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef BOOL
-#  define BOOL int
-#  define TRUE 1
-#  define FALSE 0
-#endif
-
-#ifdef __cplusplus
-typedef tesseract::TessResultRenderer TessResultRenderer;
-typedef tesseract::TessBaseAPI TessBaseAPI;
-typedef tesseract::PageIterator TessPageIterator;
-typedef tesseract::ResultIterator TessResultIterator;
-typedef tesseract::MutableIterator TessMutableIterator;
-typedef tesseract::ChoiceIterator TessChoiceIterator;
-typedef tesseract::OcrEngineMode TessOcrEngineMode;
-typedef tesseract::PageSegMode TessPageSegMode;
-typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
-typedef tesseract::Orientation TessOrientation;
-typedef tesseract::ParagraphJustification TessParagraphJustification;
-typedef tesseract::WritingDirection TessWritingDirection;
-typedef tesseract::TextlineOrder TessTextlineOrder;
-typedef tesseract::PolyBlockType TessPolyBlockType;
-typedef tesseract::ETEXT_DESC ETEXT_DESC;
-#else
-typedef struct TessResultRenderer TessResultRenderer;
-typedef struct TessBaseAPI TessBaseAPI;
-typedef struct TessPageIterator TessPageIterator;
-typedef struct TessResultIterator TessResultIterator;
-typedef struct TessMutableIterator TessMutableIterator;
-typedef struct TessChoiceIterator TessChoiceIterator;
-typedef enum TessOcrEngineMode {
-  OEM_TESSERACT_ONLY,
-  OEM_LSTM_ONLY,
-  OEM_TESSERACT_LSTM_COMBINED,
-  OEM_DEFAULT
-} TessOcrEngineMode;
-typedef enum TessPageSegMode {
-  PSM_OSD_ONLY,
-  PSM_AUTO_OSD,
-  PSM_AUTO_ONLY,
-  PSM_AUTO,
-  PSM_SINGLE_COLUMN,
-  PSM_SINGLE_BLOCK_VERT_TEXT,
-  PSM_SINGLE_BLOCK,
-  PSM_SINGLE_LINE,
-  PSM_SINGLE_WORD,
-  PSM_CIRCLE_WORD,
-  PSM_SINGLE_CHAR,
-  PSM_SPARSE_TEXT,
-  PSM_SPARSE_TEXT_OSD,
-  PSM_RAW_LINE,
-  PSM_COUNT
-} TessPageSegMode;
-typedef enum TessPageIteratorLevel {
-  RIL_BLOCK,
-  RIL_PARA,
-  RIL_TEXTLINE,
-  RIL_WORD,
-  RIL_SYMBOL
-} TessPageIteratorLevel;
-typedef enum TessPolyBlockType {
-  PT_UNKNOWN,
-  PT_FLOWING_TEXT,
-  PT_HEADING_TEXT,
-  PT_PULLOUT_TEXT,
-  PT_EQUATION,
-  PT_INLINE_EQUATION,
-  PT_TABLE,
-  PT_VERTICAL_TEXT,
-  PT_CAPTION_TEXT,
-  PT_FLOWING_IMAGE,
-  PT_HEADING_IMAGE,
-  PT_PULLOUT_IMAGE,
-  PT_HORZ_LINE,
-  PT_VERT_LINE,
-  PT_NOISE,
-  PT_COUNT
-} TessPolyBlockType;
-typedef enum TessOrientation {
-  ORIENTATION_PAGE_UP,
-  ORIENTATION_PAGE_RIGHT,
-  ORIENTATION_PAGE_DOWN,
-  ORIENTATION_PAGE_LEFT
-} TessOrientation;
-typedef enum TessParagraphJustification {
-  JUSTIFICATION_UNKNOWN,
-  JUSTIFICATION_LEFT,
-  JUSTIFICATION_CENTER,
-  JUSTIFICATION_RIGHT
-} TessParagraphJustification;
-typedef enum TessWritingDirection {
-  WRITING_DIRECTION_LEFT_TO_RIGHT,
-  WRITING_DIRECTION_RIGHT_TO_LEFT,
-  WRITING_DIRECTION_TOP_TO_BOTTOM
-} TessWritingDirection;
-typedef enum TessTextlineOrder {
-  TEXTLINE_ORDER_LEFT_TO_RIGHT,
-  TEXTLINE_ORDER_RIGHT_TO_LEFT,
-  TEXTLINE_ORDER_TOP_TO_BOTTOM
-} TessTextlineOrder;
-typedef struct ETEXT_DESC ETEXT_DESC;
-#endif
-
-typedef bool (*TessCancelFunc)(void *cancel_this, int words);
-typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
-                                 int bottom);
-
-struct Pix;
-struct Boxa;
-struct Pixa;
-
-/* General free functions */
-
-TESS_API const char *TessVersion();
-TESS_API void TessDeleteText(const char *text);
-TESS_API void TessDeleteTextArray(char **arr);
-TESS_API void TessDeleteIntArray(const int *arr);
-
-/* Renderer API */
-TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
-                                                     BOOL font_info);
-TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
-                                                   const char *datadir,
-                                                   BOOL textonly);
-TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
-    const char *outputbase);
-
-TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
-TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
-                                       TessResultRenderer *next);
-TESS_API TessResultRenderer *TessResultRendererNext(
-    TessResultRenderer *renderer);
-TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
-                                              const char *title);
-TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
-                                         TessBaseAPI *api);
-TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
-
-TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
-TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
-TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
-
-/* Base API */
-
-TESS_API TessBaseAPI *TessBaseAPICreate();
-TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
-
-TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
-
-TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
-TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
-TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
-TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
-
-TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
-                                     const char *value);
-TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
-                                          const char *value);
-
-TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
-                                        const char *name, int *value);
-TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
-                                         const char *name, BOOL *value);
-TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
-                                           const char *name, double *value);
-TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
-                                                  const char *name);
-
-TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
-TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
-                                              const char *filename);
-
-TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode oem,
-                              char **configs, int configs_size);
-TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode oem);
-TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
-                              const char *language);
-
-TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode mode,
-                              char **configs, int configs_size, char **vars_vec,
-                              char **vars_values, size_t vars_vec_size,
-                              BOOL set_only_non_debug_params);
-
-TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
-                              const char *language, TessOcrEngineMode mode,
-                              char **configs, int configs_size, char **vars_vec,
-                              char **vars_values, size_t vars_vec_size,
-                              BOOL set_only_non_debug_params);
-
-TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
-    const TessBaseAPI *handle);
-TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
-    const TessBaseAPI *handle);
-TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
-    const TessBaseAPI *handle);
-
-TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
-                                        const char *filename);
-TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
-                                             const char *filename);
-
-TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
-                                        TessPageSegMode mode);
-TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
-
-TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
-                               const unsigned char *imagedata,
-                               int bytes_per_pixel, int bytes_per_line,
-                               int left, int top, int width, int height);
-
-TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
-                                  const unsigned char *imagedata, int width,
-                                  int height, int bytes_per_pixel,
-                                  int bytes_per_line);
-TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
-
-TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
-
-TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
-                                      int width, int height);
-
-TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
-TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
-                                            struct Pixa **pixa);
-TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
-                                              struct Pixa **pixa,
-                                              int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
-                                               BOOL raw_image, int raw_padding,
-                                               struct Pixa **pixa,
-                                               int **blockids, int **paraids);
-TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
-                                           struct Pixa **pixa, int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
-                                          struct Pixa **pixa);
-TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
-                                                        struct Pixa **cc);
-TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
-                                                    TessPageIteratorLevel level,
-                                                    BOOL text_only,
-                                                    struct Pixa **pixa,
-                                                    int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
-    TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
-    BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
-    int **paraids);
-
-TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
-    const TessBaseAPI *handle);
-
-TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
-
-TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
-                                      const char *retry_config,
-                                      int timeout_millisec,
-                                      TessResultRenderer *renderer);
-TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
-                                     int page_index, const char *filename,
-                                     const char *retry_config,
-                                     int timeout_millisec,
-                                     TessResultRenderer *renderer);
-
-TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
-TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
-    TessBaseAPI *handle);
-
-TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
-TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
-
-TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
-
-TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
-                                            int page_number);
-
-TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
-TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
-
-TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
-
-#ifndef DISABLED_LEGACY_ENGINE
-TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
-                                        TessPageSegMode mode,
-                                        const char *wordstr);
-#endif // #ifndef DISABLED_LEGACY_ENGINE
-
-TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
-TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
-TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
-                                          float *out_slope);
-
-TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
-
-TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-// Call TessDeleteText(*best_script_name) to free memory allocated by this
-// function
-TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
-                                                 int *orient_deg,
-                                                 float *orient_conf,
-                                                 const char **script_name,
-                                                 float *script_conf);
-#endif // #ifndef DISABLED_LEGACY_ENGINE
-
-TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
-                                                 double margin);
-
-TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
-
-TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
-
-TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
-                                               int **block_orientation,
-                                               bool **vertical_writing);
-
-/* Page iterator */
-
-TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
-
-TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
-
-TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
-
-TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
-                                   TessPageIteratorLevel level);
-
-TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
-                                              TessPageIteratorLevel level);
-
-TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
-                                               TessPageIteratorLevel level,
-                                               TessPageIteratorLevel element);
-
-TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
-                                          TessPageIteratorLevel level,
-                                          int *left, int *top, int *right,
-                                          int *bottom);
-
-TESS_API TessPolyBlockType
-TessPageIteratorBlockType(const TessPageIterator *handle);
-
-TESS_API struct Pix *TessPageIteratorGetBinaryImage(
-    const TessPageIterator *handle, TessPageIteratorLevel level);
-
-TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
-                                              TessPageIteratorLevel level,
-                                              int padding,
-                                              struct Pix *original_image,
-                                              int *left, int *top);
-
-TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
-                                       TessPageIteratorLevel level, int *x1,
-                                       int *y1, int *x2, int *y2);
-
-TESS_API void TessPageIteratorOrientation(
-    TessPageIterator *handle, TessOrientation *orientation,
-    TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
-    float *deskew_angle);
-
-TESS_API void TessPageIteratorParagraphInfo(
-    TessPageIterator *handle, TessParagraphJustification *justification,
-    BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
-
-/* Result iterator */
-
-TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
-TESS_API TessResultIterator *TessResultIteratorCopy(
-    const TessResultIterator *handle);
-TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
-    TessResultIterator *handle);
-TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
-    const TessResultIterator *handle);
-TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
-    const TessResultIterator *handle);
-
-TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
-                                     TessPageIteratorLevel level);
-TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
-                                             TessPageIteratorLevel level);
-TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
-                                            TessPageIteratorLevel level);
-TESS_API const char *TessResultIteratorWordRecognitionLanguage(
-    const TessResultIterator *handle);
-TESS_API const char *TessResultIteratorWordFontAttributes(
-    const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
-    BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
-    int *pointsize, int *font_id);
-
-TESS_API BOOL
-TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
-TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
-
-TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
-TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
-TESS_API const char *TessChoiceIteratorGetUTF8Text(
-    const TessChoiceIterator *handle);
-TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
-
-/* Progress monitor */
-
-TESS_API ETEXT_DESC *TessMonitorCreate();
-TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
-                                       TessCancelFunc cancelFunc);
-TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
-TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
-                                         TessProgressFunc progressFunc);
-TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // API_CAPI_H_
--- a/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/export.h
+++ b/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/export.h
@ -1,37 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        export.h
-// Description: Place holder
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_PLATFORM_H_
-#define TESSERACT_PLATFORM_H_
-
-#ifndef TESS_API
-#  if defined(_WIN32) || defined(__CYGWIN__)
-#    if defined(TESS_EXPORTS)
-#      define TESS_API __declspec(dllexport)
-#    elif defined(TESS_IMPORTS)
-#      define TESS_API __declspec(dllimport)
-#    else
-#      define TESS_API
-#    endif
-#  else
-#    if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
-#      define TESS_API __attribute__((visibility("default")))
-#    else
-#      define TESS_API
-#    endif
-#  endif
-#endif
-
-#endif // TESSERACT_PLATFORM_H_
--- a/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/ltrresultiterator.h
+++ b/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/ltrresultiterator.h
@ -1,235 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        ltrresultiterator.h
-// Description: Iterator for tesseract results in strict left-to-right
-//              order that avoids using tesseract internal data structures.
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
-#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
-
-#include "export.h"       // for TESS_API
-#include "pageiterator.h" // for PageIterator
-#include "publictypes.h"  // for PageIteratorLevel
-#include "unichar.h"      // for StrongScriptDirection
-
-namespace tesseract {
-
-class BLOB_CHOICE_IT;
-class PAGE_RES;
-class WERD_RES;
-
-class Tesseract;
-
-// Class to iterate over tesseract results, providing access to all levels
-// of the page hierarchy, without including any tesseract headers or having
-// to handle any tesseract structures.
-// WARNING! This class points to data held within the TessBaseAPI class, and
-// therefore can only be used while the TessBaseAPI class still exists and
-// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-// DetectOS, or anything else that changes the internal PAGE_RES.
-// See tesseract/publictypes.h for the definition of PageIteratorLevel.
-// See also base class PageIterator, which contains the bulk of the interface.
-// LTRResultIterator adds text-specific methods for access to OCR output.
-
-class TESS_API LTRResultIterator : public PageIterator {
-  friend class ChoiceIterator;
-
-public:
-  // page_res and tesseract come directly from the BaseAPI.
-  // The rectangle parameters are copied indirectly from the Thresholder,
-  // via the BaseAPI. They represent the coordinates of some rectangle in an
-  // original image (in top-left-origin coordinates) and therefore the top-left
-  // needs to be added to any output boxes in order to specify coordinates
-  // in the original image. See TessBaseAPI::SetRectangle.
-  // The scale and scaled_yres are in case the Thresholder scaled the image
-  // rectangle prior to thresholding. Any coordinates in tesseract's image
-  // must be divided by scale before adding (rect_left, rect_top).
-  // The scaled_yres indicates the effective resolution of the binary image
-  // that tesseract has been given by the Thresholder.
-  // After the constructor, Begin has already been called.
-  LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
-                    int scaled_yres, int rect_left, int rect_top,
-                    int rect_width, int rect_height);
-
-  ~LTRResultIterator() override;
-
-  // LTRResultIterators may be copied! This makes it possible to iterate over
-  // all the objects at a lower level, while maintaining an iterator to
-  // objects at a higher level. These constructors DO NOT CALL Begin, so
-  // iterations will continue from the location of src.
-  // TODO: For now the copy constructor and operator= only need the base class
-  // versions, but if new data members are added, don't forget to add them!
-
-  // ============= Moving around within the page ============.
-
-  // See PageIterator.
-
-  // ============= Accessing data ==============.
-
-  // Returns the null terminated UTF-8 encoded text string for the current
-  // object at the given level. Use delete [] to free after use.
-  char *GetUTF8Text(PageIteratorLevel level) const;
-
-  // Set the string inserted at the end of each text line. "\n" by default.
-  void SetLineSeparator(const char *new_line);
-
-  // Set the string inserted at the end of each paragraph. "\n" by default.
-  void SetParagraphSeparator(const char *new_para);
-
-  // Returns the mean confidence of the current object at the given level.
-  // The number should be interpreted as a percent probability. (0.0f-100.0f)
-  float Confidence(PageIteratorLevel level) const;
-
-  // ============= Functions that refer to words only ============.
-
-  // Returns the font attributes of the current word. If iterating at a higher
-  // level object than words, eg textlines, then this will return the
-  // attributes of the first word in that textline.
-  // The actual return value is a string representing a font name. It points
-  // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
-  // the iterator itself, ie rendered invalid by various members of
-  // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
-  // Pointsize is returned in printers points (1/72 inch.)
-  const char *WordFontAttributes(bool *is_bold, bool *is_italic,
-                                 bool *is_underlined, bool *is_monospace,
-                                 bool *is_serif, bool *is_smallcaps,
-                                 int *pointsize, int *font_id) const;
-
-  // Return the name of the language used to recognize this word.
-  // On error, nullptr.  Do not delete this pointer.
-  const char *WordRecognitionLanguage() const;
-
-  // Return the overall directionality of this word.
-  StrongScriptDirection WordDirection() const;
-
-  // Returns true if the current word was found in a dictionary.
-  bool WordIsFromDictionary() const;
-
-  // Returns the number of blanks before the current word.
-  int BlanksBeforeWord() const;
-
-  // Returns true if the current word is numeric.
-  bool WordIsNumeric() const;
-
-  // Returns true if the word contains blamer information.
-  bool HasBlamerInfo() const;
-
-  // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
-  // of the current word.
-  const void *GetParamsTrainingBundle() const;
-
-  // Returns a pointer to the string with blamer information for this word.
-  // Assumes that the word's blamer_bundle is not nullptr.
-  const char *GetBlamerDebug() const;
-
-  // Returns a pointer to the string with misadaption information for this word.
-  // Assumes that the word's blamer_bundle is not nullptr.
-  const char *GetBlamerMisadaptionDebug() const;
-
-  // Returns true if a truth string was recorded for the current word.
-  bool HasTruthString() const;
-
-  // Returns true if the given string is equivalent to the truth string for
-  // the current word.
-  bool EquivalentToTruth(const char *str) const;
-
-  // Returns a null terminated UTF-8 encoded truth string for the current word.
-  // Use delete [] to free after use.
-  char *WordTruthUTF8Text() const;
-
-  // Returns a null terminated UTF-8 encoded normalized OCR string for the
-  // current word. Use delete [] to free after use.
-  char *WordNormedUTF8Text() const;
-
-  // Returns a pointer to serialized choice lattice.
-  // Fills lattice_size with the number of bytes in lattice data.
-  const char *WordLattice(int *lattice_size) const;
-
-  // ============= Functions that refer to symbols only ============.
-
-  // Returns true if the current symbol is a superscript.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsSuperscript() const;
-  // Returns true if the current symbol is a subscript.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsSubscript() const;
-  // Returns true if the current symbol is a dropcap.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsDropcap() const;
-
-protected:
-  const char *line_separator_;
-  const char *paragraph_separator_;
-};
-
-// Class to iterate over the classifier choices for a single RIL_SYMBOL.
-class TESS_API ChoiceIterator {
-public:
-  // Construction is from a LTRResultIterator that points to the symbol of
-  // interest. The ChoiceIterator allows a one-shot iteration over the
-  // choices for this symbol and after that it is useless.
-  explicit ChoiceIterator(const LTRResultIterator &result_it);
-  ~ChoiceIterator();
-
-  // Moves to the next choice for the symbol and returns false if there
-  // are none left.
-  bool Next();
-
-  // ============= Accessing data ==============.
-
-  // Returns the null terminated UTF-8 encoded text string for the current
-  // choice.
-  // NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
-  // internal structure and should NOT be delete[]ed to free after use.
-  const char *GetUTF8Text() const;
-
-  // Returns the confidence of the current choice depending on the used language
-  // data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
-  // choices for one symbol should roughly add up to 1.0f.
-  // If only traineddata of the legacy engine is used, the number should be
-  // interpreted as a percent probability. (0.0f-100.0f) In this case
-  // probabilities won't add up to 100. Each one stands on its own.
-  float Confidence() const;
-
-  // Returns a vector containing all timesteps, which belong to the currently
-  // selected symbol. A timestep is a vector containing pairs of symbols and
-  // floating point numbers. The number states the probability for the
-  // corresponding symbol.
-  std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
-
-private:
-  // clears the remaining spaces out of the results and adapt the probabilities
-  void filterSpaces();
-  // Pointer to the WERD_RES object owned by the API.
-  WERD_RES *word_res_;
-  // Iterator over the blob choices.
-  BLOB_CHOICE_IT *choice_it_;
-  std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
-  std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
-
-  const int *tstep_index_;
-  // regulates the rating granularity
-  double rating_coefficient_;
-  // leading blanks
-  int blanks_before_word_;
-  // true when there is lstm engine related trained data
-  bool oemLSTM_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/ocrclass.h
+++ b/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/ocrclass.h
@ -1,158 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-/**********************************************************************
- * File:        ocrclass.h
- * Description: Class definitions and constants for the OCR API.
- * Author:      Hewlett-Packard Co
- *
- * (C) Copyright 1996, Hewlett-Packard Co.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-/**********************************************************************
- * This file contains typedefs for all the structures used by
- * the HP OCR interface.
- * The structures are designed to allow them to be used with any
- * structure alignment up to 8.
- **********************************************************************/
-
-#ifndef CCUTIL_OCRCLASS_H_
-#define CCUTIL_OCRCLASS_H_
-
-#include <chrono>
-#include <ctime>
-
-namespace tesseract {
-
-/**********************************************************************
- * EANYCODE_CHAR
- * Description of a single character. The character code is defined by
- * the character set of the current font.
- * Output text is sent as an array of these structures.
- * Spaces and line endings in the output are represented in the
- * structures of the surrounding characters. They are not directly
- * represented as characters.
- * The first character in a word has a positive value of blanks.
- * Missing information should be set to the defaults in the comments.
- * If word bounds are known, but not character bounds, then the top and
- * bottom of each character should be those of the word. The left of the
- * first and right of the last char in each word should be set. All other
- * lefts and rights should be set to -1.
- * If set, the values of right and bottom are left+width and top+height.
- * Most of the members come directly from the parameters to ocr_append_char.
- * The formatting member uses the enhancement parameter and combines the
- * line direction stuff into the top 3 bits.
- * The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
- * 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
- * the coding is, only that it is backwards compatible with the previous
- * version.
- **********************************************************************/
-
-struct EANYCODE_CHAR { /*single character */
-  // It should be noted that the format for char_code for version 2.0 and beyond
-  // is UTF8 which means that ASCII characters will come out as one structure
-  // but other characters will be returned in two or more instances of this
-  // structure with a single byte of the  UTF8 code in each, but each will have
-  // the same bounding box. Programs which want to handle languagues with
-  // different characters sets will need to handle extended characters
-  // appropriately, but *all* code needs to be prepared to receive UTF8 coded
-  // characters for characters such as bullet and fancy quotes.
-  uint16_t char_code; /*character itself */
-  int16_t left;       /*of char (-1) */
-  int16_t right;      /*of char (-1) */
-  int16_t top;        /*of char (-1) */
-  int16_t bottom;     /*of char (-1) */
-  int16_t font_index; /*what font (0) */
-  uint8_t confidence; /*0=perfect, 100=reject (0/100) */
-  uint8_t point_size; /*of char, 72=i inch, (10) */
-  int8_t blanks;      /*no of spaces before this char (1) */
-  uint8_t formatting; /*char formatting (0) */
-};
-
-/**********************************************************************
- * ETEXT_DESC
- * Description of the output of the OCR engine.
- * This structure is used as both a progress monitor and the final
- * output header, since it needs to be a valid progress monitor while
- * the OCR engine is storing its output to shared memory.
- * During progress, all the buffer info is -1.
- * Progress starts at 0 and increases to 100 during OCR. No other constraint.
- * Additionally the progress callback contains the bounding box of the word that
- * is currently being processed.
- * Every progress callback, the OCR engine must set ocr_alive to 1.
- * The HP side will set ocr_alive to 0. Repeated failure to reset
- * to 1 indicates that the OCR engine is dead.
- * If the cancel function is not null then it is called with the number of
- * user words found. If it returns true then operation is cancelled.
- **********************************************************************/
-class ETEXT_DESC;
-
-using CANCEL_FUNC = bool (*)(void *, int);
-using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
-using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
-
-class ETEXT_DESC { // output header
-public:
-  int16_t count{0};    /// chars in this buffer(0)
-  int16_t progress{0}; /// percent complete increasing (0-100)
-  /** Progress monitor covers word recognition and it does not cover layout
-   * analysis.
-   * See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
-  int8_t more_to_come{0};       /// true if not last
-  volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
-  int8_t err_code{0};           /// for errcode use
-  CANCEL_FUNC cancel{nullptr};  /// returns true to cancel
-  PROGRESS_FUNC progress_callback{
-      nullptr};                      /// called whenever progress increases
-  PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
-  void *cancel_this{nullptr};        /// this or other data for cancel
-  std::chrono::steady_clock::time_point end_time;
-  /// Time to stop. Expected to be set only
-  /// by call to set_deadline_msecs().
-  EANYCODE_CHAR text[1]{}; /// character data
-
-  ETEXT_DESC() : progress_callback2(&default_progress_func) {
-    end_time = std::chrono::time_point<std::chrono::steady_clock,
-                                       std::chrono::milliseconds>();
-  }
-
-  // Sets the end time to be deadline_msecs milliseconds from now.
-  void set_deadline_msecs(int32_t deadline_msecs) {
-    if (deadline_msecs > 0) {
-      end_time = std::chrono::steady_clock::now() +
-                 std::chrono::milliseconds(deadline_msecs);
-    }
-  }
-
-  // Returns false if we've not passed the end_time, or have not set a deadline.
-  bool deadline_exceeded() const {
-    if (end_time.time_since_epoch() ==
-        std::chrono::steady_clock::duration::zero()) {
-      return false;
-    }
-    auto now = std::chrono::steady_clock::now();
-    return (now > end_time);
-  }
-
-private:
-  static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
-                                    int top, int bottom) {
-    if (ths->progress_callback != nullptr) {
-      return (*(ths->progress_callback))(ths->progress, left, right, top,
-                                         bottom);
-    }
-    return true;
-  }
-};
-
-} // namespace tesseract
-
-#endif // CCUTIL_OCRCLASS_H_
--- a/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/osdetect.h
+++ b/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/osdetect.h
@ -1,139 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        osdetect.h
-// Description: Orientation and script detection.
-// Author:      Samuel Charron
-//              Ranjith Unnikrishnan
-//
-// (C) Copyright 2008, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_OSDETECT_H_
-#define TESSERACT_CCMAIN_OSDETECT_H_
-
-#include "export.h" // for TESS_API
-
-#include <vector> // for std::vector
-
-namespace tesseract {
-
-class BLOBNBOX;
-class BLOBNBOX_CLIST;
-class BLOB_CHOICE_LIST;
-class TO_BLOCK_LIST;
-class UNICHARSET;
-
-class Tesseract;
-
-// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
-const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
-
-struct OSBestResult {
-  OSBestResult()
-      : orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
-  int orientation_id;
-  int script_id;
-  float sconfidence;
-  float oconfidence;
-};
-
-struct OSResults {
-  OSResults() : unicharset(nullptr) {
-    for (int i = 0; i < 4; ++i) {
-      for (int j = 0; j < kMaxNumberOfScripts; ++j) {
-        scripts_na[i][j] = 0;
-      }
-      orientations[i] = 0;
-    }
-  }
-  void update_best_orientation();
-  // Set the estimate of the orientation to the given id.
-  void set_best_orientation(int orientation_id);
-  // Update/Compute the best estimate of the script assuming the given
-  // orientation id.
-  void update_best_script(int orientation_id);
-  // Return the index of the script with the highest score for this orientation.
-  TESS_API int get_best_script(int orientation_id) const;
-  // Accumulate scores with given OSResults instance and update the best script.
-  void accumulate(const OSResults &osr);
-
-  // Print statistics.
-  void print_scores(void) const;
-  void print_scores(int orientation_id) const;
-
-  // Array holding scores for each orientation id [0,3].
-  // Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
-  // page respectively, where the values refer to the amount of clockwise
-  // rotation to be applied to the page for the text to be upright and readable.
-  float orientations[4];
-  // Script confidence scores for each of 4 possible orientations.
-  float scripts_na[4][kMaxNumberOfScripts];
-
-  UNICHARSET *unicharset;
-  OSBestResult best_result;
-};
-
-class OrientationDetector {
-public:
-  OrientationDetector(const std::vector<int> *allowed_scripts,
-                      OSResults *results);
-  bool detect_blob(BLOB_CHOICE_LIST *scores);
-  int get_orientation();
-
-private:
-  OSResults *osr_;
-  const std::vector<int> *allowed_scripts_;
-};
-
-class ScriptDetector {
-public:
-  ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
-                 tesseract::Tesseract *tess);
-  void detect_blob(BLOB_CHOICE_LIST *scores);
-  bool must_stop(int orientation) const;
-
-private:
-  OSResults *osr_;
-  static const char *korean_script_;
-  static const char *japanese_script_;
-  static const char *fraktur_script_;
-  int korean_id_;
-  int japanese_id_;
-  int katakana_id_;
-  int hiragana_id_;
-  int han_id_;
-  int hangul_id_;
-  int latin_id_;
-  int fraktur_id_;
-  tesseract::Tesseract *tess_;
-  const std::vector<int> *allowed_scripts_;
-};
-
-int orientation_and_script_detection(const char *filename, OSResults *,
-                                     tesseract::Tesseract *);
-
-int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
-              tesseract::Tesseract *tess);
-
-int os_detect_blobs(const std::vector<int> *allowed_scripts,
-                    BLOBNBOX_CLIST *blob_list, OSResults *osr,
-                    tesseract::Tesseract *tess);
-
-bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
-                    OSResults *, tesseract::Tesseract *tess);
-
-// Helper method to convert an orientation index to its value in degrees.
-// The value represents the amount of clockwise rotation in degrees that must be
-// applied for the text to be upright (readable).
-TESS_API int OrientationIdToValue(const int &id);
-
-} // namespace tesseract
-
-#endif // TESSERACT_CCMAIN_OSDETECT_H_
--- a/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/pageiterator.h
+++ b/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/pageiterator.h
@ -1,364 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        pageiterator.h
-// Description: Iterator for tesseract page structure that avoids using
-//              tesseract internal data structures.
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
-#define TESSERACT_CCMAIN_PAGEITERATOR_H_
-
-#include "export.h"
-#include "publictypes.h"
-
-struct Pix;
-struct Pta;
-
-namespace tesseract {
-
-struct BlamerBundle;
-class C_BLOB_IT;
-class PAGE_RES;
-class PAGE_RES_IT;
-class WERD;
-
-class Tesseract;
-
-/**
- * Class to iterate over tesseract page structure, providing access to all
- * levels of the page hierarchy, without including any tesseract headers or
- * having to handle any tesseract structures.
- * WARNING! This class points to data held within the TessBaseAPI class, and
- * therefore can only be used while the TessBaseAPI class still exists and
- * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
- * DetectOS, or anything else that changes the internal PAGE_RES.
- * See tesseract/publictypes.h for the definition of PageIteratorLevel.
- * See also ResultIterator, derived from PageIterator, which adds in the
- * ability to access OCR output with text-specific methods.
- */
-
-class TESS_API PageIterator {
-public:
-  /**
-   * page_res and tesseract come directly from the BaseAPI.
-   * The rectangle parameters are copied indirectly from the Thresholder,
-   * via the BaseAPI. They represent the coordinates of some rectangle in an
-   * original image (in top-left-origin coordinates) and therefore the top-left
-   * needs to be added to any output boxes in order to specify coordinates
-   * in the original image. See TessBaseAPI::SetRectangle.
-   * The scale and scaled_yres are in case the Thresholder scaled the image
-   * rectangle prior to thresholding. Any coordinates in tesseract's image
-   * must be divided by scale before adding (rect_left, rect_top).
-   * The scaled_yres indicates the effective resolution of the binary image
-   * that tesseract has been given by the Thresholder.
-   * After the constructor, Begin has already been called.
-   */
-  PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
-               int scaled_yres, int rect_left, int rect_top, int rect_width,
-               int rect_height);
-  virtual ~PageIterator();
-
-  /**
-   * Page/ResultIterators may be copied! This makes it possible to iterate over
-   * all the objects at a lower level, while maintaining an iterator to
-   * objects at a higher level. These constructors DO NOT CALL Begin, so
-   * iterations will continue from the location of src.
-   */
-  PageIterator(const PageIterator &src);
-  const PageIterator &operator=(const PageIterator &src);
-
-  /** Are we positioned at the same location as other? */
-  bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
-
-  // ============= Moving around within the page ============.
-
-  /**
-   * Moves the iterator to point to the start of the page to begin an
-   * iteration.
-   */
-  virtual void Begin();
-
-  /**
-   * Moves the iterator to the beginning of the paragraph.
-   * This class implements this functionality by moving it to the zero indexed
-   * blob of the first (leftmost) word on the first row of the paragraph.
-   */
-  virtual void RestartParagraph();
-
-  /**
-   * Return whether this iterator points anywhere in the first textline of a
-   * paragraph.
-   */
-  bool IsWithinFirstTextlineOfParagraph() const;
-
-  /**
-   * Moves the iterator to the beginning of the text line.
-   * This class implements this functionality by moving it to the zero indexed
-   * blob of the first (leftmost) word of the row.
-   */
-  virtual void RestartRow();
-
-  /**
-   * Moves to the start of the next object at the given level in the
-   * page hierarchy, and returns false if the end of the page was reached.
-   * NOTE that RIL_SYMBOL will skip non-text blocks, but all other
-   * PageIteratorLevel level values will visit each non-text block once.
-   * Think of non text blocks as containing a single para, with a single line,
-   * with a single imaginary word.
-   * Calls to Next with different levels may be freely intermixed.
-   * This function iterates words in right-to-left scripts correctly, if
-   * the appropriate language has been loaded into Tesseract.
-   */
-  virtual bool Next(PageIteratorLevel level);
-
-  /**
-   * Returns true if the iterator is at the start of an object at the given
-   * level.
-   *
-   * For instance, suppose an iterator it is pointed to the first symbol of the
-   * first word of the third line of the second paragraph of the first block in
-   * a page, then:
-   *   it.IsAtBeginningOf(RIL_BLOCK) = false
-   *   it.IsAtBeginningOf(RIL_PARA) = false
-   *   it.IsAtBeginningOf(RIL_TEXTLINE) = true
-   *   it.IsAtBeginningOf(RIL_WORD) = true
-   *   it.IsAtBeginningOf(RIL_SYMBOL) = true
-   */
-  virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
-
-  /**
-   * Returns whether the iterator is positioned at the last element in a
-   * given level. (e.g. the last word in a line, the last line in a block)
-   *
-   *     Here's some two-paragraph example
-   *   text.  It starts off innocuously
-   *   enough but quickly turns bizarre.
-   *     The author inserts a cornucopia
-   *   of words to guard against confused
-   *   references.
-   *
-   * Now take an iterator it pointed to the start of "bizarre."
-   *  it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
-   *  it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
-   *  it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
-   */
-  virtual bool IsAtFinalElement(PageIteratorLevel level,
-                                PageIteratorLevel element) const;
-
-  /**
-   * Returns whether this iterator is positioned
-   *   before other:   -1
-   *   equal to other:  0
-   *   after other:     1
-   */
-  int Cmp(const PageIterator &other) const;
-
-  // ============= Accessing data ==============.
-  // Coordinate system:
-  // Integer coordinates are at the cracks between the pixels.
-  // The top-left corner of the top-left pixel in the image is at (0,0).
-  // The bottom-right corner of the bottom-right pixel in the image is at
-  // (width, height).
-  // Every bounding box goes from the top-left of the top-left contained
-  // pixel to the bottom-right of the bottom-right contained pixel, so
-  // the bounding box of the single top-left pixel in the image is:
-  // (0,0)->(1,1).
-  // If an image rectangle has been set in the API, then returned coordinates
-  // relate to the original (full) image, rather than the rectangle.
-
-  /**
-   * Controls what to include in a bounding box. Bounding boxes of all levels
-   * between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
-   * Between layout analysis and recognition, it isn't known where all
-   * diacritics belong, so this control is used to include or exclude some
-   * diacritics that are above or below the main body of the word. In most cases
-   * where the placement is obvious, and after recognition, it doesn't make as
-   * much difference, as the diacritics will already be included in the word.
-   */
-  void SetBoundingBoxComponents(bool include_upper_dots,
-                                bool include_lower_dots) {
-    include_upper_dots_ = include_upper_dots;
-    include_lower_dots_ = include_lower_dots;
-  }
-
-  /**
-   * Returns the bounding rectangle of the current object at the given level.
-   * See comment on coordinate system above.
-   * Returns false if there is no such object at the current position.
-   * The returned bounding box is guaranteed to match the size and position
-   * of the image returned by GetBinaryImage, but may clip foreground pixels
-   * from a grey image. The padding argument to GetImage can be used to expand
-   * the image to include more foreground pixels. See GetImage below.
-   */
-  bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
-                   int *bottom) const;
-  bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
-                   int *right, int *bottom) const;
-  /**
-   * Returns the bounding rectangle of the object in a coordinate system of the
-   * working image rectangle having its origin at (rect_left_, rect_top_) with
-   * respect to the original image and is scaled by a factor scale_.
-   */
-  bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
-                           int *right, int *bottom) const;
-
-  /** Returns whether there is no object of a given level. */
-  bool Empty(PageIteratorLevel level) const;
-
-  /**
-   * Returns the type of the current block.
-   * See tesseract/publictypes.h for PolyBlockType.
-   */
-  PolyBlockType BlockType() const;
-
-  /**
-   * Returns the polygon outline of the current block. The returned Pta must
-   * be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
-   * of the polygon, and the last edge is the line segment between the last
-   * point and the first point. nullptr will be returned if the iterator is
-   * at the end of the document or layout analysis was not used.
-   */
-  Pta *BlockPolygon() const;
-
-  /**
-   * Returns a binary image of the current object at the given level.
-   * The position and size match the return from BoundingBoxInternal, and so
-   * this could be upscaled with respect to the original input image.
-   * Use pixDestroy to delete the image after use.
-   */
-  Pix *GetBinaryImage(PageIteratorLevel level) const;
-
-  /**
-   * Returns an image of the current object at the given level in greyscale
-   * if available in the input. To guarantee a binary image use BinaryImage.
-   * NOTE that in order to give the best possible image, the bounds are
-   * expanded slightly over the binary connected component, by the supplied
-   * padding, so the top-left position of the returned image is returned
-   * in (left,top). These will most likely not match the coordinates
-   * returned by BoundingBox.
-   * If you do not supply an original image, you will get a binary one.
-   * Use pixDestroy to delete the image after use.
-   */
-  Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
-                int *left, int *top) const;
-
-  /**
-   * Returns the baseline of the current object at the given level.
-   * The baseline is the line that passes through (x1, y1) and (x2, y2).
-   * WARNING: with vertical text, baselines may be vertical!
-   * Returns false if there is no baseline at the current position.
-   */
-  bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
-                int *y2) const;
-
-  // Returns the attributes of the current row.
-  void RowAttributes(float *row_height, float *descenders,
-                     float *ascenders) const;
-
-  /**
-   * Returns orientation for the block the iterator points to.
-   *   orientation, writing_direction, textline_order: see publictypes.h
-   *   deskew_angle: after rotating the block so the text orientation is
-   *                 upright, how many radians does one have to rotate the
-   *                 block anti-clockwise for it to be level?
-   *                   -Pi/4 <= deskew_angle <= Pi/4
-   */
-  void Orientation(tesseract::Orientation *orientation,
-                   tesseract::WritingDirection *writing_direction,
-                   tesseract::TextlineOrder *textline_order,
-                   float *deskew_angle) const;
-
-  /**
-   * Returns information about the current paragraph, if available.
-   *
-   *   justification -
-   *     LEFT if ragged right, or fully justified and script is left-to-right.
-   *     RIGHT if ragged left, or fully justified and script is right-to-left.
-   *     unknown if it looks like source code or we have very few lines.
-   *   is_list_item -
-   *     true if we believe this is a member of an ordered or unordered list.
-   *   is_crown -
-   *     true if the first line of the paragraph is aligned with the other
-   *     lines of the paragraph even though subsequent paragraphs have first
-   *     line indents.  This typically indicates that this is the continuation
-   *     of a previous paragraph or that it is the very first paragraph in
-   *     the chapter.
-   *   first_line_indent -
-   *     For LEFT aligned paragraphs, the first text line of paragraphs of
-   *     this kind are indented this many pixels from the left edge of the
-   *     rest of the paragraph.
-   *     for RIGHT aligned paragraphs, the first text line of paragraphs of
-   *     this kind are indented this many pixels from the right edge of the
-   *     rest of the paragraph.
-   *     NOTE 1: This value may be negative.
-   *     NOTE 2: if *is_crown == true, the first line of this paragraph is
-   *             actually flush, and first_line_indent is set to the "common"
-   *             first_line_indent for subsequent paragraphs in this block
-   *             of text.
-   */
-  void ParagraphInfo(tesseract::ParagraphJustification *justification,
-                     bool *is_list_item, bool *is_crown,
-                     int *first_line_indent) const;
-
-  // If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
-  // of the current word to the given pointer (takes ownership of the pointer)
-  // and returns true.
-  // Can only be used when iterating on the word level.
-  bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
-
-protected:
-  /**
-   * Sets up the internal data for iterating the blobs of a new word, then
-   * moves the iterator to the given offset.
-   */
-  void BeginWord(int offset);
-
-  /** Pointer to the page_res owned by the API. */
-  PAGE_RES *page_res_;
-  /** Pointer to the Tesseract object owned by the API. */
-  Tesseract *tesseract_;
-  /**
-   * The iterator to the page_res_. Owned by this ResultIterator.
-   * A pointer just to avoid dragging in Tesseract includes.
-   */
-  PAGE_RES_IT *it_;
-  /**
-   * The current input WERD being iterated. If there is an output from OCR,
-   * then word_ is nullptr. Owned by the API
-   */
-  WERD *word_;
-  /** The length of the current word_. */
-  int word_length_;
-  /** The current blob index within the word. */
-  int blob_index_;
-  /**
-   * Iterator to the blobs within the word. If nullptr, then we are iterating
-   * OCR results in the box_word.
-   * Owned by this ResultIterator.
-   */
-  C_BLOB_IT *cblob_it_;
-  /** Control over what to include in bounding boxes. */
-  bool include_upper_dots_;
-  bool include_lower_dots_;
-  /** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
-  int scale_;
-  int scaled_yres_;
-  int rect_left_;
-  int rect_top_;
-  int rect_width_;
-  int rect_height_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/publictypes.h
+++ b/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/publictypes.h
@ -1,281 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        publictypes.h
-// Description: Types used in both the API and internally
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
-#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
-
-namespace tesseract {
-
-// This file contains types that are used both by the API and internally
-// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
-// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
-// Restated: It is OK for low-level Tesseract files to include publictypes.h,
-// but not for the low-level tesseract code to include top-level API code.
-// This file should not use other Tesseract types, as that would drag
-// their includes into the API-level.
-
-/** Number of printers' points in an inch. The unit of the pointsize return. */
-constexpr int kPointsPerInch = 72;
-/**
- * Minimum believable resolution. Used as a default if there is no other
- * information, as it is safer to under-estimate than over-estimate.
- */
-constexpr int kMinCredibleResolution = 70;
-/** Maximum believable resolution.  */
-constexpr int kMaxCredibleResolution = 2400;
-/**
- * Ratio between median blob size and likely resolution. Used to estimate
- * resolution when none is provided. This is basically 1/usual text size in
- * inches.  */
-constexpr int kResolutionEstimationFactor = 10;
-
-/**
- * Possible types for a POLY_BLOCK or ColPartition.
- * Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
- * below, as well as kPolyBlockNames in layout_test.cc.
- * Used extensively by ColPartition, and POLY_BLOCK.
- */
-enum PolyBlockType {
-  PT_UNKNOWN,         // Type is not yet known. Keep as the first element.
-  PT_FLOWING_TEXT,    // Text that lives inside a column.
-  PT_HEADING_TEXT,    // Text that spans more than one column.
-  PT_PULLOUT_TEXT,    // Text that is in a cross-column pull-out region.
-  PT_EQUATION,        // Partition belonging to an equation region.
-  PT_INLINE_EQUATION, // Partition has inline equation.
-  PT_TABLE,           // Partition belonging to a table region.
-  PT_VERTICAL_TEXT,   // Text-line runs vertically.
-  PT_CAPTION_TEXT,    // Text that belongs to an image.
-  PT_FLOWING_IMAGE,   // Image that lives inside a column.
-  PT_HEADING_IMAGE,   // Image that spans more than one column.
-  PT_PULLOUT_IMAGE,   // Image that is in a cross-column pull-out region.
-  PT_HORZ_LINE,       // Horizontal Line.
-  PT_VERT_LINE,       // Vertical Line.
-  PT_NOISE,           // Lies outside of any column.
-  PT_COUNT
-};
-
-/** Returns true if PolyBlockType is of horizontal line type */
-inline bool PTIsLineType(PolyBlockType type) {
-  return type == PT_HORZ_LINE || type == PT_VERT_LINE;
-}
-/** Returns true if PolyBlockType is of image type */
-inline bool PTIsImageType(PolyBlockType type) {
-  return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
-         type == PT_PULLOUT_IMAGE;
-}
-/** Returns true if PolyBlockType is of text type */
-inline bool PTIsTextType(PolyBlockType type) {
-  return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
-         type == PT_PULLOUT_TEXT || type == PT_TABLE ||
-         type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
-         type == PT_INLINE_EQUATION;
-}
-// Returns true if PolyBlockType is of pullout(inter-column) type
-inline bool PTIsPulloutType(PolyBlockType type) {
-  return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
-}
-
-/**
- *  +------------------+  Orientation Example:
- *  | 1 Aaaa Aaaa Aaaa |  ====================
- *  | Aaa aa aaa aa    |  To left is a diagram of some (1) English and
- *  | aaaaaa A aa aaa. |  (2) Chinese text and a (3) photo credit.
- *  |                2 |
- *  |   #######  c c C |  Upright Latin characters are represented as A and a.
- *  |   #######  c c c |  '<' represents a latin character rotated
- *  | < #######  c c c |      anti-clockwise 90 degrees.
- *  | < #######  c   c |
- *  | < #######  .   c |  Upright Chinese characters are represented C and c.
- *  | 3 #######      c |
- *  +------------------+  NOTA BENE: enum values here should match goodoc.proto
-
- * If you orient your head so that "up" aligns with Orientation,
- * then the characters will appear "right side up" and readable.
- *
- * In the example above, both the English and Chinese paragraphs are oriented
- * so their "up" is the top of the page (page up).  The photo credit is read
- * with one's head turned leftward ("up" is to page left).
- *
- * The values of this enum match the convention of Tesseract's osdetect.h
-*/
-enum Orientation {
-  ORIENTATION_PAGE_UP = 0,
-  ORIENTATION_PAGE_RIGHT = 1,
-  ORIENTATION_PAGE_DOWN = 2,
-  ORIENTATION_PAGE_LEFT = 3,
-};
-
-/**
- * The grapheme clusters within a line of text are laid out logically
- * in this direction, judged when looking at the text line rotated so that
- * its Orientation is "page up".
- *
- * For English text, the writing direction is left-to-right.  For the
- * Chinese text in the above example, the writing direction is top-to-bottom.
- */
-enum WritingDirection {
-  WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
-  WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
-  WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
-};
-
-/**
- * The text lines are read in the given sequence.
- *
- * In English, the order is top-to-bottom.
- * In Chinese, vertical text lines are read right-to-left.  Mongolian is
- * written in vertical columns top to bottom like Chinese, but the lines
- * order left-to right.
- *
- * Note that only some combinations make sense.  For example,
- * WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
- */
-enum TextlineOrder {
-  TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
-  TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
-  TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
-};
-
-/**
- * Possible modes for page layout analysis. These *must* be kept in order
- * of decreasing amount of layout analysis to be done, except for OSD_ONLY,
- * so that the inequality test macros below work.
- */
-enum PageSegMode {
-  PSM_OSD_ONLY = 0,      ///< Orientation and script detection only.
-  PSM_AUTO_OSD = 1,      ///< Automatic page segmentation with orientation and
-                         ///< script detection. (OSD)
-  PSM_AUTO_ONLY = 2,     ///< Automatic page segmentation, but no OSD, or OCR.
-  PSM_AUTO = 3,          ///< Fully automatic page segmentation, but no OSD.
-  PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
-  PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
-                                  ///< vertically aligned text.
-  PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
-  PSM_SINGLE_LINE = 7,  ///< Treat the image as a single text line.
-  PSM_SINGLE_WORD = 8,  ///< Treat the image as a single word.
-  PSM_CIRCLE_WORD = 9,  ///< Treat the image as a single word in a circle.
-  PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
-  PSM_SPARSE_TEXT =
-      11, ///< Find as much text as possible in no particular order.
-  PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
-  PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
-                     ///< hacks that are Tesseract-specific.
-
-  PSM_COUNT ///< Number of enum entries.
-};
-
-/**
- * Inline functions that act on a PageSegMode to determine whether components of
- * layout analysis are enabled.
- * *Depend critically on the order of elements of PageSegMode.*
- * NOTE that arg is an int for compatibility with INT_PARAM.
- */
-inline bool PSM_OSD_ENABLED(int pageseg_mode) {
-  return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
-  return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
-}
-inline bool PSM_SPARSE(int pageseg_mode) {
-  return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
-}
-inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
-}
-inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
-  return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
-         pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-
-/**
- * enum of the elements of the page hierarchy, used in ResultIterator
- * to provide functions that operate on each level without having to
- * have 5x as many functions.
- */
-enum PageIteratorLevel {
-  RIL_BLOCK,    // Block of text/image/separator line.
-  RIL_PARA,     // Paragraph within a block.
-  RIL_TEXTLINE, // Line within a paragraph.
-  RIL_WORD,     // Word within a textline.
-  RIL_SYMBOL    // Symbol/character within a word.
-};
-
-/**
- * JUSTIFICATION_UNKNOWN
- *   The alignment is not clearly one of the other options.  This could happen
- *   for example if there are only one or two lines of text or the text looks
- *   like source code or poetry.
- *
- * NOTA BENE: Fully justified paragraphs (text aligned to both left and right
- *    margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
- *    is written with a left-to-right script and with JUSTIFICATION_RIGHT if
- *    their text is written in a right-to-left script.
- *
- * Interpretation for text read in vertical lines:
- *   "Left" is wherever the starting reading position is.
- *
- * JUSTIFICATION_LEFT
- *   Each line, except possibly the first, is flush to the same left tab stop.
- *
- * JUSTIFICATION_CENTER
- *   The text lines of the paragraph are centered about a line going
- *   down through their middle of the text lines.
- *
- * JUSTIFICATION_RIGHT
- *   Each line, except possibly the first, is flush to the same right tab stop.
- */
-enum ParagraphJustification {
-  JUSTIFICATION_UNKNOWN,
-  JUSTIFICATION_LEFT,
-  JUSTIFICATION_CENTER,
-  JUSTIFICATION_RIGHT,
-};
-
-/**
- * When Tesseract/Cube is initialized we can choose to instantiate/load/run
- * only the Tesseract part, only the Cube part or both along with the combiner.
- * The preference of which engine to use is stored in tessedit_ocr_engine_mode.
- *
- * ATTENTION: When modifying this enum, please make sure to make the
- * appropriate changes to all the enums mirroring it (e.g. OCREngine in
- * cityblock/workflow/detection/detection_storage.proto). Such enums will
- * mention the connection to OcrEngineMode in the comments.
- */
-enum OcrEngineMode {
-  OEM_TESSERACT_ONLY,          // Run Tesseract only - fastest; deprecated
-  OEM_LSTM_ONLY,               // Run just the LSTM line recognizer.
-  OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
-                               // to Tesseract when things get difficult.
-                               // deprecated
-  OEM_DEFAULT,                 // Specify this mode when calling init_*(),
-                               // to indicate that any of the above modes
-                               // should be automatically inferred from the
-                               // variables in the language-specific config,
-                               // command-line configs, or if not specified
-                               // in any of the above should be set to the
-                               // default OEM_TESSERACT_ONLY.
-  OEM_COUNT                    // Number of OEMs
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
--- a/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/renderer.h
+++ b/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/renderer.h
@ -1,311 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        renderer.h
-// Description: Rendering interface to inject into TessBaseAPI
-//
-// (C) Copyright 2011, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_RENDERER_H_
-#define TESSERACT_API_RENDERER_H_
-
-#include "export.h"
-
-// To avoid collision with other typenames include the ABSOLUTE MINIMUM
-// complexity of includes here. Use forward declarations wherever possible
-// and hide includes of complex types in baseapi.cpp.
-#include <cstdint>
-#include <string> // for std::string
-#include <vector> // for std::vector
-
-struct Pix;
-
-namespace tesseract {
-
-class TessBaseAPI;
-
-/**
- * Interface for rendering tesseract results into a document, such as text,
- * HOCR or pdf. This class is abstract. Specific classes handle individual
- * formats. This interface is then used to inject the renderer class into
- * tesseract when processing images.
- *
- * For simplicity implementing this with tesseract version 3.01,
- * the renderer contains document state that is cleared from document
- * to document just as the TessBaseAPI is. This way the base API can just
- * delegate its rendering functionality to injected renderers, and the
- * renderers can manage the associated state needed for the specific formats
- * in addition to the heuristics for producing it.
- */
-class TESS_API TessResultRenderer {
-public:
-  virtual ~TessResultRenderer();
-
-  // Takes ownership of pointer so must be new'd instance.
-  // Renderers aren't ordered, but appends the sequences of next parameter
-  // and existing next(). The renderers should be unique across both lists.
-  void insert(TessResultRenderer *next);
-
-  // Returns the next renderer or nullptr.
-  TessResultRenderer *next() {
-    return next_;
-  }
-
-  /**
-   * Starts a new document with the given title.
-   * This clears the contents of the output data.
-   * Title should use UTF-8 encoding.
-   */
-  bool BeginDocument(const char *title);
-
-  /**
-   * Adds the recognized text from the source image to the current document.
-   * Invalid if BeginDocument not yet called.
-   *
-   * Note that this API is a bit weird but is designed to fit into the
-   * current TessBaseAPI implementation where the api has lots of state
-   * information that we might want to add in.
-   */
-  bool AddImage(TessBaseAPI *api);
-
-  /**
-   * Finishes the document and finalizes the output data
-   * Invalid if BeginDocument not yet called.
-   */
-  bool EndDocument();
-
-  const char *file_extension() const {
-    return file_extension_;
-  }
-  const char *title() const {
-    return title_.c_str();
-  }
-
-  // Is everything fine? Otherwise something went wrong.
-  bool happy() const {
-    return happy_;
-  }
-
-  /**
-   * Returns the index of the last image given to AddImage
-   * (i.e. images are incremented whether the image succeeded or not)
-   *
-   * This is always defined. It means either the number of the
-   * current image, the last image ended, or in the completed document
-   * depending on when in the document lifecycle you are looking at it.
-   * Will return -1 if a document was never started.
-   */
-  int imagenum() const {
-    return imagenum_;
-  }
-
-protected:
-  /**
-   * Called by concrete classes.
-   *
-   * outputbase is the name of the output file excluding
-   * extension. For example, "/path/to/chocolate-chip-cookie-recipe"
-   *
-   * extension indicates the file extension to be used for output
-   * files. For example "pdf" will produce a .pdf file, and "hocr"
-   * will produce .hocr files.
-   */
-  TessResultRenderer(const char *outputbase, const char *extension);
-
-  // Hook for specialized handling in BeginDocument()
-  virtual bool BeginDocumentHandler();
-
-  // This must be overridden to render the OCR'd results
-  virtual bool AddImageHandler(TessBaseAPI *api) = 0;
-
-  // Hook for specialized handling in EndDocument()
-  virtual bool EndDocumentHandler();
-
-  // Renderers can call this to append '\0' terminated strings into
-  // the output string returned by GetOutput.
-  // This method will grow the output buffer if needed.
-  void AppendString(const char *s);
-
-  // Renderers can call this to append binary byte sequences into
-  // the output string returned by GetOutput. Note that s is not necessarily
-  // '\0' terminated (and can contain '\0' within it).
-  // This method will grow the output buffer if needed.
-  void AppendData(const char *s, int len);
-
-private:
-  TessResultRenderer *next_;   // Can link multiple renderers together
-  FILE *fout_;                 // output file pointer
-  const char *file_extension_; // standard extension for generated output
-  std::string title_;          // title of document being rendered
-  int imagenum_;               // index of last image added
-  bool happy_;                 // I get grumpy when the disk fills up, etc.
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessTextRenderer : public TessResultRenderer {
-public:
-  explicit TessTextRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into an hocr text string
- */
-class TESS_API TessHOcrRenderer : public TessResultRenderer {
-public:
-  explicit TessHOcrRenderer(const char *outputbase, bool font_info);
-  explicit TessHOcrRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool font_info_; // whether to print font information
-};
-
-/**
- * Renders tesseract output into an alto text string
- */
-class TESS_API TessAltoRenderer : public TessResultRenderer {
-public:
-  explicit TessAltoRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool begin_document;
-};
-
-/**
- * Renders Tesseract output into a TSV string
- */
-class TESS_API TessTsvRenderer : public TessResultRenderer {
-public:
-  explicit TessTsvRenderer(const char *outputbase, bool font_info);
-  explicit TessTsvRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool font_info_; // whether to print font information
-};
-
-/**
- * Renders tesseract output into searchable PDF
- */
-class TESS_API TessPDFRenderer : public TessResultRenderer {
-public:
-  // datadir is the location of the TESSDATA. We need it because
-  // we load a custom PDF font from this location.
-  TessPDFRenderer(const char *outputbase, const char *datadir,
-                  bool textonly = false);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  // We don't want to have every image in memory at once,
-  // so we store some metadata as we go along producing
-  // PDFs one page at a time. At the end, that metadata is
-  // used to make everything that isn't easily handled in a
-  // streaming fashion.
-  long int obj_;                  // counter for PDF objects
-  std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
-  std::vector<long int> pages_;   // object number for every /Page object
-  std::string datadir_;           // where to find the custom font
-  bool textonly_;                 // skip images if set
-  // Bookkeeping only. DIY = Do It Yourself.
-  void AppendPDFObjectDIY(size_t objectsize);
-  // Bookkeeping + emit data.
-  void AppendPDFObject(const char *data);
-  // Create the /Contents object for an entire page.
-  char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
-  // Turn an image into a PDF object. Only transcode if we have to.
-  static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
-                            char **pdf_object, long int *pdf_object_size,
-                            int jpg_quality);
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessUnlvRenderer : public TessResultRenderer {
-public:
-  explicit TessUnlvRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string for LSTMBox
- */
-class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
-public:
-  explicit TessLSTMBoxRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessBoxTextRenderer : public TessResultRenderer {
-public:
-  explicit TessBoxTextRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string in WordStr format
- */
-class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
-public:
-  explicit TessWordStrBoxRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-/**
- * Renders tesseract output into an osd text string
- */
-class TESS_API TessOsdRenderer : public TessResultRenderer {
-public:
-  explicit TessOsdRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-#endif // ndef DISABLED_LEGACY_ENGINE
-
-} // namespace tesseract.
-
-#endif // TESSERACT_API_RENDERER_H_
--- a/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/resultiterator.h
+++ b/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/resultiterator.h
@ -1,250 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        resultiterator.h
-// Description: Iterator for tesseract results that is capable of
-//              iterating in proper reading order over Bi Directional
-//              (e.g. mixed Hebrew and English) text.
-// Author:      David Eger
-//
-// (C) Copyright 2011, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
-#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
-
-#include "export.h"            // for TESS_API, TESS_LOCAL
-#include "ltrresultiterator.h" // for LTRResultIterator
-#include "publictypes.h"       // for PageIteratorLevel
-#include "unichar.h"           // for StrongScriptDirection
-
-#include <set>    // for std::pair
-#include <vector> // for std::vector
-
-namespace tesseract {
-
-class TESS_API ResultIterator : public LTRResultIterator {
-public:
-  static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
-
-  /**
-   * ResultIterator is copy constructible!
-   * The default copy constructor works just fine for us.
-   */
-  ~ResultIterator() override = default;
-
-  // ============= Moving around within the page ============.
-  /**
-   * Moves the iterator to point to the start of the page to begin
-   * an iteration.
-   */
-  void Begin() override;
-
-  /**
-   * Moves to the start of the next object at the given level in the
-   * page hierarchy in the appropriate reading order and returns false if
-   * the end of the page was reached.
-   * NOTE that RIL_SYMBOL will skip non-text blocks, but all other
-   * PageIteratorLevel level values will visit each non-text block once.
-   * Think of non text blocks as containing a single para, with a single line,
-   * with a single imaginary word.
-   * Calls to Next with different levels may be freely intermixed.
-   * This function iterates words in right-to-left scripts correctly, if
-   * the appropriate language has been loaded into Tesseract.
-   */
-  bool Next(PageIteratorLevel level) override;
-
-  /**
-   * IsAtBeginningOf() returns whether we're at the logical beginning of the
-   * given level.  (as opposed to ResultIterator's left-to-right top-to-bottom
-   * order).  Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
-   * For a full description, see pageiterator.h
-   */
-  bool IsAtBeginningOf(PageIteratorLevel level) const override;
-
-  /**
-   * Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
-   * For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
-   * point at the last word in a paragraph.  See PageIterator for full comment.
-   */
-  bool IsAtFinalElement(PageIteratorLevel level,
-                        PageIteratorLevel element) const override;
-
-  // ============= Functions that refer to words only ============.
-  // Returns the number of blanks before the current word.
-  int BlanksBeforeWord() const;
-
-  // ============= Accessing data ==============.
-
-  /**
-   * Returns the null terminated UTF-8 encoded text string for the current
-   * object at the given level. Use delete [] to free after use.
-   */
-  virtual char *GetUTF8Text(PageIteratorLevel level) const;
-
-  /**
-   * Returns the LSTM choices for every LSTM timestep for the current word.
-   */
-  virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
-      *GetRawLSTMTimesteps() const;
-  virtual std::vector<std::vector<std::pair<const char *, float>>>
-      *GetBestLSTMSymbolChoices() const;
-
-  /**
-   * Return whether the current paragraph's dominant reading direction
-   * is left-to-right (as opposed to right-to-left).
-   */
-  bool ParagraphIsLtr() const;
-
-  // ============= Exposed only for testing =============.
-
-  /**
-   * Yields the reading order as a sequence of indices and (optional)
-   * meta-marks for a set of words (given left-to-right).
-   * The meta marks are passed as negative values:
-   *   kMinorRunStart  Start of minor direction text.
-   *   kMinorRunEnd    End of minor direction text.
-   *   kComplexWord    The next indexed word contains both left-to-right and
-   *                    right-to-left characters and was treated as neutral.
-   *
-   * For example, suppose we have five words in a text line,
-   * indexed [0,1,2,3,4] from the leftmost side of the text line.
-   * The following are all believable reading_orders:
-   *
-   * Left-to-Right (in ltr paragraph):
-   *     { 0, 1, 2, 3, 4 }
-   * Left-to-Right (in rtl paragraph):
-   *     { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
-   * Right-to-Left (in rtl paragraph):
-   *     { 4, 3, 2, 1, 0 }
-   * Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
-   *     { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
-   */
-  static void CalculateTextlineOrder(
-      bool paragraph_is_ltr,
-      const std::vector<StrongScriptDirection> &word_dirs,
-      std::vector<int> *reading_order);
-
-  static const int kMinorRunStart;
-  static const int kMinorRunEnd;
-  static const int kComplexWord;
-
-protected:
-  /**
-   * We presume the data associated with the given iterator will outlive us.
-   * NB: This is private because it does something that is non-obvious:
-   *   it resets to the beginning of the paragraph instead of staying wherever
-   *   resit might have pointed.
-   */
-  explicit ResultIterator(const LTRResultIterator &resit);
-
-private:
-  /**
-   * Calculates the current paragraph's dominant writing direction.
-   * Typically, members should use current_paragraph_ltr_ instead.
-   */
-  bool CurrentParagraphIsLtr() const;
-
-  /**
-   * Returns word indices as measured from resit->RestartRow() = index 0
-   * for the reading order of words within a textline given an iterator
-   * into the middle of the text line.
-   * In addition to non-negative word indices, the following negative values
-   * may be inserted:
-   *   kMinorRunStart  Start of minor direction text.
-   *   kMinorRunEnd    End of minor direction text.
-   *   kComplexWord    The previous word contains both left-to-right and
-   *                   right-to-left characters and was treated as neutral.
-   */
-  void CalculateTextlineOrder(bool paragraph_is_ltr,
-                              const LTRResultIterator &resit,
-                              std::vector<int> *indices) const;
-  /** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
-  void CalculateTextlineOrder(bool paragraph_is_ltr,
-                              const LTRResultIterator &resit,
-                              std::vector<StrongScriptDirection> *ssd,
-                              std::vector<int> *indices) const;
-
-  /**
-   * What is the index of the current word in a strict left-to-right reading
-   * of the row?
-   */
-  int LTRWordIndex() const;
-
-  /**
-   * Given an iterator pointing at a word, returns the logical reading order
-   * of blob indices for the word.
-   */
-  void CalculateBlobOrder(std::vector<int> *blob_indices) const;
-
-  /** Precondition: current_paragraph_is_ltr_ is set. */
-  void MoveToLogicalStartOfTextline();
-
-  /**
-   * Precondition: current_paragraph_is_ltr_ and in_minor_direction_
-   * are set.
-   */
-  void MoveToLogicalStartOfWord();
-
-  /** Are we pointing at the final (reading order) symbol of the word? */
-  bool IsAtFinalSymbolOfWord() const;
-
-  /** Are we pointing at the first (reading order) symbol of the word? */
-  bool IsAtFirstSymbolOfWord() const;
-
-  /**
-   * Append any extra marks that should be appended to this word when printed.
-   * Mostly, these are Unicode BiDi control characters.
-   */
-  void AppendSuffixMarks(std::string *text) const;
-
-  /** Appends the current word in reading order to the given buffer.*/
-  void AppendUTF8WordText(std::string *text) const;
-
-  /**
-   * Appends the text of the current text line, *assuming this iterator is
-   * positioned at the beginning of the text line*  This function
-   * updates the iterator to point to the first position past the text line.
-   * Each textline is terminated in a single newline character.
-   * If the textline ends a paragraph, it gets a second terminal newline.
-   */
-  void IterateAndAppendUTF8TextlineText(std::string *text);
-
-  /**
-   * Appends the text of the current paragraph in reading order
-   * to the given buffer.
-   * Each textline is terminated in a single newline character, and the
-   * paragraph gets an extra newline at the end.
-   */
-  void AppendUTF8ParagraphText(std::string *text) const;
-
-  /** Returns whether the bidi_debug flag is set to at least min_level. */
-  bool BidiDebug(int min_level) const;
-
-  bool current_paragraph_is_ltr_;
-
-  /**
-   * Is the currently pointed-at character at the beginning of
-   * a minor-direction run?
-   */
-  bool at_beginning_of_minor_run_;
-
-  /** Is the currently pointed-at character in a minor-direction sequence? */
-  bool in_minor_direction_;
-
-  /**
-   * Should detected inter-word spaces be preserved, or "compressed" to a single
-   * space character (default behavior).
-   */
-  bool preserve_interword_spaces_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/unichar.h
+++ b/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/unichar.h
@ -1,174 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        unichar.h
-// Description: Unicode character/ligature class.
-// Author:      Ray Smith
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCUTIL_UNICHAR_H_
-#define TESSERACT_CCUTIL_UNICHAR_H_
-
-#include "export.h"
-
-#include <memory.h>
-#include <cstring>
-#include <string>
-#include <vector>
-
-namespace tesseract {
-
-// Maximum number of characters that can be stored in a UNICHAR. Must be
-// at least 4. Must not exceed 31 without changing the coding of length.
-#define UNICHAR_LEN 30
-
-// A UNICHAR_ID is the unique id of a unichar.
-using UNICHAR_ID = int;
-
-// A variable to indicate an invalid or uninitialized unichar id.
-static const int INVALID_UNICHAR_ID = -1;
-// A special unichar that corresponds to INVALID_UNICHAR_ID.
-static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
-
-enum StrongScriptDirection {
-  DIR_NEUTRAL = 0,       // Text contains only neutral characters.
-  DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
-  DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
-  DIR_MIX = 3,           // Text contains a mixture of left-to-right
-                         // and right-to-left characters.
-};
-
-using char32 = signed int;
-
-// The UNICHAR class holds a single classification result. This may be
-// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
-// multiple Unicode characters representing the NFKC expansion of a ligature
-// such as fi, ffl etc. These are also stored as utf8.
-class TESS_API UNICHAR {
-public:
-  UNICHAR() {
-    memset(chars, 0, UNICHAR_LEN);
-  }
-
-  // Construct from a utf8 string. If len<0 then the string is null terminated.
-  // If the string is too long to fit in the UNICHAR then it takes only what
-  // will fit.
-  UNICHAR(const char *utf8_str, int len);
-
-  // Construct from a single UCS4 character.
-  explicit UNICHAR(int unicode);
-
-  // Default copy constructor and operator= are OK.
-
-  // Get the first character as UCS-4.
-  int first_uni() const;
-
-  // Get the length of the UTF8 string.
-  int utf8_len() const {
-    int len = chars[UNICHAR_LEN - 1];
-    return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
-  }
-
-  // Get a UTF8 string, but NOT nullptr terminated.
-  const char *utf8() const {
-    return chars;
-  }
-
-  // Get a terminated UTF8 string: Must delete[] it after use.
-  char *utf8_str() const;
-
-  // Get the number of bytes in the first character of the given utf8 string.
-  static int utf8_step(const char *utf8_str);
-
-  // A class to simplify iterating over and accessing elements of a UTF8
-  // string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
-  // take ownership of the underlying byte array. It also does not permit
-  // modification of the array (as the name suggests).
-  //
-  // Example:
-  //   for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
-  //        it != UNICHAR::end(str, len);
-  //        ++it) {
-  //     printf("UCS-4 symbol code = %d\n", *it);
-  //     char buf[5];
-  //     int char_len = it.get_utf8(buf); buf[char_len] = '\0';
-  //     printf("Char = %s\n", buf);
-  //   }
-  class TESS_API const_iterator {
-    using CI = const_iterator;
-
-  public:
-    // Step to the next UTF8 character.
-    // If the current position is at an illegal UTF8 character, then print an
-    // error message and step by one byte. If the current position is at a
-    // nullptr value, don't step past it.
-    const_iterator &operator++();
-
-    // Return the UCS-4 value at the current position.
-    // If the current position is at an illegal UTF8 value, return a single
-    // space character.
-    int operator*() const;
-
-    // Store the UTF-8 encoding of the current codepoint into buf, which must be
-    // at least 4 bytes long. Return the number of bytes written.
-    // If the current position is at an illegal UTF8 value, writes a single
-    // space character and returns 1.
-    // Note that this method does not null-terminate the buffer.
-    int get_utf8(char *buf) const;
-    // Returns the number of bytes of the current codepoint. Returns 1 if the
-    // current position is at an illegal UTF8 value.
-    int utf8_len() const;
-    // Returns true if the UTF-8 encoding at the current position is legal.
-    bool is_legal() const;
-
-    // Return the pointer into the string at the current position.
-    const char *utf8_data() const {
-      return it_;
-    }
-
-    // Iterator equality operators.
-    friend bool operator==(const CI &lhs, const CI &rhs) {
-      return lhs.it_ == rhs.it_;
-    }
-    friend bool operator!=(const CI &lhs, const CI &rhs) {
-      return !(lhs == rhs);
-    }
-
-  private:
-    friend class UNICHAR;
-    explicit const_iterator(const char *it) : it_(it) {}
-
-    const char *it_; // Pointer into the string.
-  };
-
-  // Create a start/end iterator pointing to a string. Note that these methods
-  // are static and do NOT create a copy or take ownership of the underlying
-  // array.
-  static const_iterator begin(const char *utf8_str, int byte_length);
-  static const_iterator end(const char *utf8_str, int byte_length);
-
-  // Converts a utf-8 string to a vector of unicodes.
-  // Returns an empty vector if the input contains invalid UTF-8.
-  static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
-  // Converts a vector of unicodes to a utf8 string.
-  // Returns an empty string if the input contains an invalid unicode.
-  static std::string UTF32ToUTF8(const std::vector<char32> &str32);
-
-private:
-  // A UTF-8 representation of 1 or more Unicode characters.
-  // The last element (chars[UNICHAR_LEN - 1]) is a length if
-  // its value < UNICHAR_LEN, otherwise it is a genuine character.
-  char chars[UNICHAR_LEN]{};
-};
-
-} // namespace tesseract
-
-#endif // TESSERACT_CCUTIL_UNICHAR_H_
--- a/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/version.h
+++ b/third_party/ocr/tesseract-ocr/kylin/mips64/include/tesseract/version.h
@ -1,34 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        version.h
-// Description: Version information
-//
-// (C) Copyright 2018, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_VERSION_H_
-#define TESSERACT_API_VERSION_H_
-
-// clang-format off
-
-#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
-#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
-#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
-
-#define TESSERACT_VERSION          \
-  (TESSERACT_MAJOR_VERSION << 16 | \
-   TESSERACT_MINOR_VERSION <<  8 | \
-   TESSERACT_MICRO_VERSION)
-
-#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
-
-// clang-format on
-
-#endif // TESSERACT_API_VERSION_H_
--- a/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/baseapi.h
+++ b/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/baseapi.h
@ -1,812 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        baseapi.h
-// Description: Simple API for calling tesseract.
-// Author:      Ray Smith
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_BASEAPI_H_
-#define TESSERACT_API_BASEAPI_H_
-
-#ifdef HAVE_CONFIG_H
-#  include "config_auto.h" // DISABLED_LEGACY_ENGINE
-#endif
-
-#include "export.h"
-#include "pageiterator.h"
-#include "publictypes.h"
-#include "resultiterator.h"
-#include "unichar.h"
-
-#include "version.h"
-
-#include <cstdio>
-#include <vector> // for std::vector
-
-struct Pix;
-struct Pixa;
-struct Boxa;
-
-namespace tesseract {
-
-class PAGE_RES;
-class ParagraphModel;
-class BLOCK_LIST;
-class ETEXT_DESC;
-struct OSResults;
-class UNICHARSET;
-
-class Dawg;
-class Dict;
-class EquationDetect;
-class PageIterator;
-class ImageThresholder;
-class LTRResultIterator;
-class ResultIterator;
-class MutableIterator;
-class TessResultRenderer;
-class Tesseract;
-
-// Function to read a std::vector<char> from a whole file.
-// Returns false on failure.
-using FileReader = bool (*)(const char *filename, std::vector<char> *data);
-
-using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
-                               bool) const;
-using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
-                                                  int, const char *, int);
-
-/**
- * Base class for all tesseract APIs.
- * Specific classes can add ability to work on different inputs or produce
- * different outputs.
- * This class is mostly an interface layer on top of the Tesseract instance
- * class to hide the data types so that users of this class don't have to
- * include any other Tesseract headers.
- */
-class TESS_API TessBaseAPI {
-public:
-  TessBaseAPI();
-  virtual ~TessBaseAPI();
-  // Copy constructor and assignment operator are currently unsupported.
-  TessBaseAPI(TessBaseAPI const &) = delete;
-  TessBaseAPI &operator=(TessBaseAPI const &) = delete;
-
-  /**
-   * Returns the version identifier as a static string. Do not delete.
-   */
-  static const char *Version();
-
-  /**
-   * If compiled with OpenCL AND an available OpenCL
-   * device is deemed faster than serial code, then
-   * "device" is populated with the cl_device_id
-   * and returns sizeof(cl_device_id)
-   * otherwise *device=nullptr and returns 0.
-   */
-  static size_t getOpenCLDevice(void **device);
-
-  /**
-   * Set the name of the input file. Needed for training and
-   * reading a UNLV zone file, and for searchable PDF output.
-   */
-  void SetInputName(const char *name);
-  /**
-   * These functions are required for searchable PDF output.
-   * We need our hands on the input file so that we can include
-   * it in the PDF without transcoding. If that is not possible,
-   * we need the original image. Finally, resolution metadata
-   * is stored in the PDF so we need that as well.
-   */
-  const char *GetInputName();
-  // Takes ownership of the input pix.
-  void SetInputImage(Pix *pix);
-  Pix *GetInputImage();
-  int GetSourceYResolution();
-  const char *GetDatapath();
-
-  /** Set the name of the bonus output files. Needed only for debugging. */
-  void SetOutputName(const char *name);
-
-  /**
-   * Set the value of an internal "parameter."
-   * Supply the name of the parameter and the value as a string, just as
-   * you would in a config file.
-   * Returns false if the name lookup failed.
-   * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
-   * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
-   * SetVariable may be used before Init, but settings will revert to
-   * defaults on End().
-   *
-   * Note: Must be called after Init(). Only works for non-init variables
-   * (init variables should be passed to Init()).
-   */
-  bool SetVariable(const char *name, const char *value);
-  bool SetDebugVariable(const char *name, const char *value);
-
-  /**
-   * Returns true if the parameter was found among Tesseract parameters.
-   * Fills in value with the value of the parameter.
-   */
-  bool GetIntVariable(const char *name, int *value) const;
-  bool GetBoolVariable(const char *name, bool *value) const;
-  bool GetDoubleVariable(const char *name, double *value) const;
-
-  /**
-   * Returns the pointer to the string that represents the value of the
-   * parameter if it was found among Tesseract parameters.
-   */
-  const char *GetStringVariable(const char *name) const;
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-  /**
-   * Print Tesseract fonts table to the given file.
-   */
-  void PrintFontsTable(FILE *fp) const;
-
-#endif
-
-  /**
-   * Print Tesseract parameters to the given file.
-   */
-  void PrintVariables(FILE *fp) const;
-
-  /**
-   * Get value of named variable as a string, if it exists.
-   */
-  bool GetVariableAsString(const char *name, std::string *val) const;
-
-  /**
-   * Instances are now mostly thread-safe and totally independent,
-   * but some global parameters remain. Basically it is safe to use multiple
-   * TessBaseAPIs in different threads in parallel, UNLESS:
-   * you use SetVariable on some of the Params in classify and textord.
-   * If you do, then the effect will be to change it for all your instances.
-   *
-   * Start tesseract. Returns zero on success and -1 on failure.
-   * NOTE that the only members that may be called before Init are those
-   * listed above here in the class definition.
-   *
-   * The datapath must be the name of the tessdata directory.
-   * The language is (usually) an ISO 639-3 string or nullptr will default to
-   * eng. It is entirely safe (and eventually will be efficient too) to call
-   * Init multiple times on the same instance to change language, or just
-   * to reset the classifier.
-   * The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
-   * that multiple languages are to be loaded. Eg hin+eng will load Hindi and
-   * English. Languages may specify internally that they want to be loaded
-   * with one or more other languages, so the ~ sign is available to override
-   * that. Eg if hin were set to load eng by default, then hin+~eng would force
-   * loading only hin. The number of loaded languages is limited only by
-   * memory, with the caveat that loading additional languages will impact
-   * both speed and accuracy, as there is more work to do to decide on the
-   * applicable language, and there is more chance of hallucinating incorrect
-   * words.
-   * WARNING: On changing languages, all Tesseract parameters are reset
-   * back to their default values. (Which may vary between languages.)
-   * If you have a rare need to set a Variable that controls
-   * initialization for a second call to Init you should explicitly
-   * call End() and then use SetVariable before Init. This is only a very
-   * rare use case, since there are very few uses that require any parameters
-   * to be set before Init.
-   *
-   * If set_only_non_debug_params is true, only params that do not contain
-   * "debug" in the name will be set.
-   */
-  int Init(const char *datapath, const char *language, OcrEngineMode mode,
-           char **configs, int configs_size,
-           const std::vector<std::string> *vars_vec,
-           const std::vector<std::string> *vars_values,
-           bool set_only_non_debug_params);
-  int Init(const char *datapath, const char *language, OcrEngineMode oem) {
-    return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
-  }
-  int Init(const char *datapath, const char *language) {
-    return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
-                false);
-  }
-  // In-memory version reads the traineddata file directly from the given
-  // data[data_size] array, and/or reads data via a FileReader.
-  int Init(const char *data, int data_size, const char *language,
-           OcrEngineMode mode, char **configs, int configs_size,
-           const std::vector<std::string> *vars_vec,
-           const std::vector<std::string> *vars_values,
-           bool set_only_non_debug_params, FileReader reader);
-
-  /**
-   * Returns the languages string used in the last valid initialization.
-   * If the last initialization specified "deu+hin" then that will be
-   * returned. If hin loaded eng automatically as well, then that will
-   * not be included in this list. To find the languages actually
-   * loaded use GetLoadedLanguagesAsVector.
-   * The returned string should NOT be deleted.
-   */
-  const char *GetInitLanguagesAsString() const;
-
-  /**
-   * Returns the loaded languages in the vector of std::string.
-   * Includes all languages loaded by the last Init, including those loaded
-   * as dependencies of other loaded languages.
-   */
-  void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
-
-  /**
-   * Returns the available languages in the sorted vector of std::string.
-   */
-  void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
-
-  /**
-   * Init only for page layout analysis. Use only for calls to SetImage and
-   * AnalysePage. Calls that attempt recognition will generate an error.
-   */
-  void InitForAnalysePage();
-
-  /**
-   * Read a "config" file containing a set of param, value pairs.
-   * Searches the standard places: tessdata/configs, tessdata/tessconfigs
-   * and also accepts a relative or absolute path name.
-   * Note: only non-init params will be set (init params are set by Init()).
-   */
-  void ReadConfigFile(const char *filename);
-  /** Same as above, but only set debug params from the given config file. */
-  void ReadDebugConfigFile(const char *filename);
-
-  /**
-   * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
-   * The mode is stored as an IntParam so it can also be modified by
-   * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
-   */
-  void SetPageSegMode(PageSegMode mode);
-
-  /** Return the current page segmentation mode. */
-  PageSegMode GetPageSegMode() const;
-
-  /**
-   * Recognize a rectangle from an image and return the result as a string.
-   * May be called many times for a single Init.
-   * Currently has no error checking.
-   * Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
-   * Palette color images will not work properly and must be converted to
-   * 24 bit.
-   * Binary images of 1 bit per pixel may also be given but they must be
-   * byte packed with the MSB of the first byte being the first pixel, and a
-   * 1 represents WHITE. For binary images set bytes_per_pixel=0.
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   *
-   * Note that TesseractRect is the simplified convenience interface.
-   * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
-   * and one or more of the Get*Text functions below.
-   */
-  char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
-                      int bytes_per_line, int left, int top, int width,
-                      int height);
-
-  /**
-   * Call between pages or documents etc to free up memory and forget
-   * adaptive data.
-   */
-  void ClearAdaptiveClassifier();
-
-  /**
-   * @defgroup AdvancedAPI Advanced API
-   * The following methods break TesseractRect into pieces, so you can
-   * get hold of the thresholded image, get the text in different formats,
-   * get bounding boxes, confidences etc.
-   */
-  /* @{ */
-
-  /**
-   * Provide an image for Tesseract to recognize. Format is as
-   * TesseractRect above. Copies the image buffer and converts to Pix.
-   * SetImage clears all recognition results, and sets the rectangle to the
-   * full image, so it may be followed immediately by a GetUTF8Text, and it
-   * will automatically perform recognition.
-   */
-  void SetImage(const unsigned char *imagedata, int width, int height,
-                int bytes_per_pixel, int bytes_per_line);
-
-  /**
-   * Provide an image for Tesseract to recognize. As with SetImage above,
-   * Tesseract takes its own copy of the image, so it need not persist until
-   * after Recognize.
-   * Pix vs raw, which to use?
-   * Use Pix where possible. Tesseract uses Pix as its internal representation
-   * and it is therefore more efficient to provide a Pix directly.
-   */
-  void SetImage(Pix *pix);
-
-  /**
-   * Set the resolution of the source image in pixels per inch so font size
-   * information can be calculated in results.  Call this after SetImage().
-   */
-  void SetSourceResolution(int ppi);
-
-  /**
-   * Restrict recognition to a sub-rectangle of the image. Call after SetImage.
-   * Each SetRectangle clears the recogntion results so multiple rectangles
-   * can be recognized with the same image.
-   */
-  void SetRectangle(int left, int top, int width, int height);
-
-  /**
-   * Get a copy of the internal thresholded image from Tesseract.
-   * Caller takes ownership of the Pix and must pixDestroy it.
-   * May be called any time after SetImage, or after TesseractRect.
-   */
-  Pix *GetThresholdedImage();
-
-  /**
-   * Get the result of page layout analysis as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   */
-  Boxa *GetRegions(Pixa **pixa);
-
-  /**
-   * Get the textlines as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * If raw_image is true, then extract from the original image instead of the
-   * thresholded image and pad by raw_padding pixels.
-   * If blockids is not nullptr, the block-id of each line is also returned as
-   * an array of one element per line. delete [] after use. If paraids is not
-   * nullptr, the paragraph-id of each line within its block is also returned as
-   * an array of one element per line. delete [] after use.
-   */
-  Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
-                     int **blockids, int **paraids);
-  /*
-   Helper method to extract from the thresholded image. (most common usage)
-*/
-  Boxa *GetTextlines(Pixa **pixa, int **blockids) {
-    return GetTextlines(false, 0, pixa, blockids, nullptr);
-  }
-
-  /**
-   * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
-   * pair, in reading order. Enables downstream handling of non-rectangular
-   * regions.
-   * Can be called before or after Recognize.
-   * If blockids is not nullptr, the block-id of each line is also returned as
-   * an array of one element per line. delete [] after use.
-   */
-  Boxa *GetStrips(Pixa **pixa, int **blockids);
-
-  /**
-   * Get the words as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   */
-  Boxa *GetWords(Pixa **pixa);
-
-  /**
-   * Gets the individual connected (text) components (created
-   * after pages segmentation step, but before recognition)
-   * as a leptonica-style Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * Note: the caller is responsible for calling boxaDestroy()
-   * on the returned Boxa array and pixaDestroy() on cc array.
-   */
-  Boxa *GetConnectedComponents(Pixa **cc);
-
-  /**
-   * Get the given level kind of components (block, textline, word etc.) as a
-   * leptonica-style Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * If blockids is not nullptr, the block-id of each component is also returned
-   * as an array of one element per component. delete [] after use.
-   * If blockids is not nullptr, the paragraph-id of each component with its
-   * block is also returned as an array of one element per component. delete []
-   * after use. If raw_image is true, then portions of the original image are
-   * extracted instead of the thresholded image and padded with raw_padding. If
-   * text_only is true, then only text components are returned.
-   */
-  Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
-                           bool raw_image, int raw_padding, Pixa **pixa,
-                           int **blockids, int **paraids);
-  // Helper function to get binary images with no padding (most common usage).
-  Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
-                           Pixa **pixa, int **blockids) {
-    return GetComponentImages(level, text_only, false, 0, pixa, blockids,
-                              nullptr);
-  }
-
-  /**
-   * Returns the scale factor of the thresholded image that would be returned by
-   * GetThresholdedImage() and the various GetX() methods that call
-   * GetComponentImages().
-   * Returns 0 if no thresholder has been set.
-   */
-  int GetThresholdedImageScaleFactor() const;
-
-  /**
-   * Runs page layout analysis in the mode set by SetPageSegMode.
-   * May optionally be called prior to Recognize to get access to just
-   * the page layout results. Returns an iterator to the results.
-   * If merge_similar_words is true, words are combined where suitable for use
-   * with a line recognizer. Use if you want to use AnalyseLayout to find the
-   * textlines, and then want to process textline fragments with an external
-   * line recognizer.
-   * Returns nullptr on error or an empty page.
-   * The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  PageIterator *AnalyseLayout();
-  PageIterator *AnalyseLayout(bool merge_similar_words);
-
-  /**
-   * Recognize the image from SetAndThresholdImage, generating Tesseract
-   * internal structures. Returns 0 on success.
-   * Optional. The Get*Text functions below will call Recognize if needed.
-   * After Recognize, the output is kept internally until the next SetImage.
-   */
-  int Recognize(ETEXT_DESC *monitor);
-
-  /**
-   * Methods to retrieve information after SetAndThresholdImage(),
-   * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
-   */
-
-  /**
-   * Turns images into symbolic text.
-   *
-   * filename can point to a single image, a multi-page TIFF,
-   * or a plain text list of image filenames.
-   *
-   * retry_config is useful for debugging. If not nullptr, you can fall
-   * back to an alternate configuration if a page fails for some
-   * reason.
-   *
-   * timeout_millisec terminates processing if any single page
-   * takes too long. Set to 0 for unlimited time.
-   *
-   * renderer is responible for creating the output. For example,
-   * use the TessTextRenderer if you want plaintext output, or
-   * the TessPDFRender to produce searchable PDF.
-   *
-   * If tessedit_page_number is non-negative, will only process that
-   * single page. Works for multi-page tiff file, or filelist.
-   *
-   * Returns true if successful, false on error.
-   */
-  bool ProcessPages(const char *filename, const char *retry_config,
-                    int timeout_millisec, TessResultRenderer *renderer);
-  // Does the real work of ProcessPages.
-  bool ProcessPagesInternal(const char *filename, const char *retry_config,
-                            int timeout_millisec, TessResultRenderer *renderer);
-
-  /**
-   * Turn a single image into symbolic text.
-   *
-   * The pix is the image processed. filename and page_index are
-   * metadata used by side-effect processes, such as reading a box
-   * file or formatting as hOCR.
-   *
-   * See ProcessPages for descriptions of other parameters.
-   */
-  bool ProcessPage(Pix *pix, int page_index, const char *filename,
-                   const char *retry_config, int timeout_millisec,
-                   TessResultRenderer *renderer);
-
-  /**
-   * Get a reading-order iterator to the results of LayoutAnalysis and/or
-   * Recognize. The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  ResultIterator *GetIterator();
-
-  /**
-   * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
-   * The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  MutableIterator *GetMutableIterator();
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   */
-  char *GetUTF8Text();
-
-  /**
-   * Make a HTML-formatted string with hOCR markup from the internal
-   * data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * monitor can be used to
-   *  cancel the recognition
-   *  receive progress callbacks
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
-
-  /**
-   * Make a HTML-formatted string with hOCR markup from the internal
-   * data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetHOCRText(int page_number);
-
-  /**
-   * Make an XML-formatted string with Alto markup from the internal
-   * data structures.
-   */
-  char *GetAltoText(ETEXT_DESC *monitor, int page_number);
-
-  /**
-   * Make an XML-formatted string with Alto markup from the internal
-   * data structures.
-   */
-  char *GetAltoText(int page_number);
-
-  /**
-   * Make a TSV-formatted string from the internal data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetTSVText(int page_number);
-
-  /**
-   * Make a box file for LSTM training from the internal data structures.
-   * Constructs coordinates in the original image - not just the rectangle.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetLSTMBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded in the same
-   * format as a box file used in training.
-   * Constructs coordinates in the original image - not just the rectangle.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded in the same
-   * format as a WordStr box file used in training.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetWordStrBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UNLV format Latin-1 with specific reject and suspect codes.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetUNLVText();
-
-  /**
-   * Detect the orientation of the input image and apparent script (alphabet).
-   * orient_deg is the detected clockwise rotation of the input image in degrees
-   * (0, 90, 180, 270)
-   * orient_conf is the confidence (15.0 is reasonably confident)
-   * script_name is an ASCII string, the name of the script, e.g. "Latin"
-   * script_conf is confidence level in the script
-   * Returns true on success and writes values to each parameter as an output
-   */
-  bool DetectOrientationScript(int *orient_deg, float *orient_conf,
-                               const char **script_name, float *script_conf);
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   * page_number is a 0-based page index that will appear in the osd file.
-   */
-  char *GetOsdText(int page_number);
-
-  /** Returns the (average) confidence value between 0 and 100. */
-  int MeanTextConf();
-  /**
-   * Returns all word confidences (between 0 and 100) in an array, terminated
-   * by -1.  The calling function must delete [] after use.
-   * The number of confidences should correspond to the number of space-
-   * delimited words in GetUTF8Text.
-   */
-  int *AllWordConfidences();
-
-#ifndef DISABLED_LEGACY_ENGINE
-  /**
-   * Applies the given word to the adaptive classifier if possible.
-   * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
-   * tell the boundaries of the graphemes.
-   * Assumes that SetImage/SetRectangle have been used to set the image
-   * to the given word. The mode arg should be PSM_SINGLE_WORD or
-   * PSM_CIRCLE_WORD, as that will be used to control layout analysis.
-   * The currently set PageSegMode is preserved.
-   * Returns false if adaption was not possible for some reason.
-   */
-  bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
-#endif //  ndef DISABLED_LEGACY_ENGINE
-
-  /**
-   * Free up recognition results and any stored image data, without actually
-   * freeing any recognition data that would be time-consuming to reload.
-   * Afterwards, you must call SetImage or TesseractRect before doing
-   * any Recognize or Get* operation.
-   */
-  void Clear();
-
-  /**
-   * Close down tesseract and free up all memory. End() is equivalent to
-   * destructing and reconstructing your TessBaseAPI.
-   * Once End() has been used, none of the other API functions may be used
-   * other than Init and anything declared above it in the class definition.
-   */
-  void End();
-
-  /**
-   * Clear any library-level memory caches.
-   * There are a variety of expensive-to-load constant data structures (mostly
-   * language dictionaries) that are cached globally -- surviving the Init()
-   * and End() of individual TessBaseAPI's.  This function allows the clearing
-   * of these caches.
-   **/
-  static void ClearPersistentCache();
-
-  /**
-   * Check whether a word is valid according to Tesseract's language model
-   * @return 0 if the word is invalid, non-zero if valid.
-   * @warning temporary! This function will be removed from here and placed
-   * in a separate API at some future time.
-   */
-  int IsValidWord(const char *word) const;
-  // Returns true if utf8_character is defined in the UniCharset.
-  bool IsValidCharacter(const char *utf8_character) const;
-
-  bool GetTextDirection(int *out_offset, float *out_slope);
-
-  /** Sets Dict::letter_is_okay_ function to point to the given function. */
-  void SetDictFunc(DictFunc f);
-
-  /** Sets Dict::probability_in_context_ function to point to the given
-   * function.
-   */
-  void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
-
-  /**
-   * Estimates the Orientation And Script of the image.
-   * @return true if the image was processed successfully.
-   */
-  bool DetectOS(OSResults *);
-
-  /**
-   * Return text orientation of each block as determined by an earlier run
-   * of layout analysis.
-   */
-  void GetBlockTextOrientations(int **block_orientation,
-                                bool **vertical_writing);
-
-  /** This method returns the string form of the specified unichar. */
-  const char *GetUnichar(int unichar_id) const;
-
-  /** Return the pointer to the i-th dawg loaded into tesseract_ object. */
-  const Dawg *GetDawg(int i) const;
-
-  /** Return the number of dawgs loaded into tesseract_ object. */
-  int NumDawgs() const;
-
-  Tesseract *tesseract() const {
-    return tesseract_;
-  }
-
-  OcrEngineMode oem() const {
-    return last_oem_requested_;
-  }
-
-  void set_min_orientation_margin(double margin);
-  /* @} */
-
-protected:
-  /** Common code for setting the image. Returns true if Init has been called.
-   */
-  bool InternalSetImage();
-
-  /**
-   * Run the thresholder to make the thresholded image. If pix is not nullptr,
-   * the source is thresholded to pix instead of the internal IMAGE.
-   */
-  virtual bool Threshold(Pix **pix);
-
-  /**
-   * Find lines from the image making the BLOCK_LIST.
-   * @return 0 on success.
-   */
-  int FindLines();
-
-  /** Delete the pageres and block list ready for a new page. */
-  void ClearResults();
-
-  /**
-   * Return an LTR Result Iterator -- used only for training, as we really want
-   * to ignore all BiDi smarts at that point.
-   * delete once you're done with it.
-   */
-  LTRResultIterator *GetLTRIterator();
-
-  /**
-   * Return the length of the output text string, as UTF8, assuming
-   * one newline per line and one per block, with a terminator,
-   * and assuming a single character reject marker for each rejected character.
-   * Also return the number of recognized blobs in blob_count.
-   */
-  int TextLength(int *blob_count) const;
-
-  //// paragraphs.cpp ////////////////////////////////////////////////////
-  void DetectParagraphs(bool after_text_recognition);
-
-  const PAGE_RES *GetPageRes() const {
-    return page_res_;
-  }
-
-protected:
-  Tesseract *tesseract_;          ///< The underlying data object.
-  Tesseract *osd_tesseract_;      ///< For orientation & script detection.
-  EquationDetect *equ_detect_;    ///< The equation detector.
-  FileReader reader_;             ///< Reads files from any filesystem.
-  ImageThresholder *thresholder_; ///< Image thresholding module.
-  std::vector<ParagraphModel *> *paragraph_models_;
-  BLOCK_LIST *block_list_;           ///< The page layout.
-  PAGE_RES *page_res_;               ///< The page-level data.
-  std::string input_file_;           ///< Name used by training code.
-  std::string output_file_;          ///< Name used by debug code.
-  std::string datapath_;             ///< Current location of tessdata.
-  std::string language_;             ///< Last initialized language.
-  OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
-  bool recognition_done_;            ///< page_res_ contains recognition data.
-
-  /**
-   * @defgroup ThresholderParams Thresholder Parameters
-   * Parameters saved from the Thresholder. Needed to rebuild coordinates.
-   */
-  /* @{ */
-  int rect_left_;
-  int rect_top_;
-  int rect_width_;
-  int rect_height_;
-  int image_width_;
-  int image_height_;
-  /* @} */
-
-private:
-  // A list of image filenames gets special consideration
-  bool ProcessPagesFileList(FILE *fp, std::string *buf,
-                            const char *retry_config, int timeout_millisec,
-                            TessResultRenderer *renderer,
-                            int tessedit_page_number);
-  // TIFF supports multipage so gets special consideration.
-  bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
-                                 const char *filename, const char *retry_config,
-                                 int timeout_millisec,
-                                 TessResultRenderer *renderer,
-                                 int tessedit_page_number);
-}; // class TessBaseAPI.
-
-/** Escape a char string - remove &<>"' with HTML codes. */
-std::string HOcrEscape(const char *text);
-
-} // namespace tesseract
-
-#endif // TESSERACT_API_BASEAPI_H_
--- a/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/capi.h
+++ b/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/capi.h
@ -1,484 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        capi.h
-// Description: C-API TessBaseAPI
-//
-// (C) Copyright 2012, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef API_CAPI_H_
-#define API_CAPI_H_
-
-#include "export.h"
-
-#ifdef __cplusplus
-#  include <tesseract/baseapi.h>
-#  include <tesseract/ocrclass.h>
-#  include <tesseract/pageiterator.h>
-#  include <tesseract/renderer.h>
-#  include <tesseract/resultiterator.h>
-#endif
-
-#include <stdbool.h>
-#include <stdio.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef BOOL
-#  define BOOL int
-#  define TRUE 1
-#  define FALSE 0
-#endif
-
-#ifdef __cplusplus
-typedef tesseract::TessResultRenderer TessResultRenderer;
-typedef tesseract::TessBaseAPI TessBaseAPI;
-typedef tesseract::PageIterator TessPageIterator;
-typedef tesseract::ResultIterator TessResultIterator;
-typedef tesseract::MutableIterator TessMutableIterator;
-typedef tesseract::ChoiceIterator TessChoiceIterator;
-typedef tesseract::OcrEngineMode TessOcrEngineMode;
-typedef tesseract::PageSegMode TessPageSegMode;
-typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
-typedef tesseract::Orientation TessOrientation;
-typedef tesseract::ParagraphJustification TessParagraphJustification;
-typedef tesseract::WritingDirection TessWritingDirection;
-typedef tesseract::TextlineOrder TessTextlineOrder;
-typedef tesseract::PolyBlockType TessPolyBlockType;
-typedef tesseract::ETEXT_DESC ETEXT_DESC;
-#else
-typedef struct TessResultRenderer TessResultRenderer;
-typedef struct TessBaseAPI TessBaseAPI;
-typedef struct TessPageIterator TessPageIterator;
-typedef struct TessResultIterator TessResultIterator;
-typedef struct TessMutableIterator TessMutableIterator;
-typedef struct TessChoiceIterator TessChoiceIterator;
-typedef enum TessOcrEngineMode {
-  OEM_TESSERACT_ONLY,
-  OEM_LSTM_ONLY,
-  OEM_TESSERACT_LSTM_COMBINED,
-  OEM_DEFAULT
-} TessOcrEngineMode;
-typedef enum TessPageSegMode {
-  PSM_OSD_ONLY,
-  PSM_AUTO_OSD,
-  PSM_AUTO_ONLY,
-  PSM_AUTO,
-  PSM_SINGLE_COLUMN,
-  PSM_SINGLE_BLOCK_VERT_TEXT,
-  PSM_SINGLE_BLOCK,
-  PSM_SINGLE_LINE,
-  PSM_SINGLE_WORD,
-  PSM_CIRCLE_WORD,
-  PSM_SINGLE_CHAR,
-  PSM_SPARSE_TEXT,
-  PSM_SPARSE_TEXT_OSD,
-  PSM_RAW_LINE,
-  PSM_COUNT
-} TessPageSegMode;
-typedef enum TessPageIteratorLevel {
-  RIL_BLOCK,
-  RIL_PARA,
-  RIL_TEXTLINE,
-  RIL_WORD,
-  RIL_SYMBOL
-} TessPageIteratorLevel;
-typedef enum TessPolyBlockType {
-  PT_UNKNOWN,
-  PT_FLOWING_TEXT,
-  PT_HEADING_TEXT,
-  PT_PULLOUT_TEXT,
-  PT_EQUATION,
-  PT_INLINE_EQUATION,
-  PT_TABLE,
-  PT_VERTICAL_TEXT,
-  PT_CAPTION_TEXT,
-  PT_FLOWING_IMAGE,
-  PT_HEADING_IMAGE,
-  PT_PULLOUT_IMAGE,
-  PT_HORZ_LINE,
-  PT_VERT_LINE,
-  PT_NOISE,
-  PT_COUNT
-} TessPolyBlockType;
-typedef enum TessOrientation {
-  ORIENTATION_PAGE_UP,
-  ORIENTATION_PAGE_RIGHT,
-  ORIENTATION_PAGE_DOWN,
-  ORIENTATION_PAGE_LEFT
-} TessOrientation;
-typedef enum TessParagraphJustification {
-  JUSTIFICATION_UNKNOWN,
-  JUSTIFICATION_LEFT,
-  JUSTIFICATION_CENTER,
-  JUSTIFICATION_RIGHT
-} TessParagraphJustification;
-typedef enum TessWritingDirection {
-  WRITING_DIRECTION_LEFT_TO_RIGHT,
-  WRITING_DIRECTION_RIGHT_TO_LEFT,
-  WRITING_DIRECTION_TOP_TO_BOTTOM
-} TessWritingDirection;
-typedef enum TessTextlineOrder {
-  TEXTLINE_ORDER_LEFT_TO_RIGHT,
-  TEXTLINE_ORDER_RIGHT_TO_LEFT,
-  TEXTLINE_ORDER_TOP_TO_BOTTOM
-} TessTextlineOrder;
-typedef struct ETEXT_DESC ETEXT_DESC;
-#endif
-
-typedef bool (*TessCancelFunc)(void *cancel_this, int words);
-typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
-                                 int bottom);
-
-struct Pix;
-struct Boxa;
-struct Pixa;
-
-/* General free functions */
-
-TESS_API const char *TessVersion();
-TESS_API void TessDeleteText(const char *text);
-TESS_API void TessDeleteTextArray(char **arr);
-TESS_API void TessDeleteIntArray(const int *arr);
-
-/* Renderer API */
-TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
-                                                     BOOL font_info);
-TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
-                                                   const char *datadir,
-                                                   BOOL textonly);
-TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
-    const char *outputbase);
-
-TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
-TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
-                                       TessResultRenderer *next);
-TESS_API TessResultRenderer *TessResultRendererNext(
-    TessResultRenderer *renderer);
-TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
-                                              const char *title);
-TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
-                                         TessBaseAPI *api);
-TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
-
-TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
-TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
-TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
-
-/* Base API */
-
-TESS_API TessBaseAPI *TessBaseAPICreate();
-TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
-
-TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
-
-TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
-TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
-TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
-TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
-
-TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
-                                     const char *value);
-TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
-                                          const char *value);
-
-TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
-                                        const char *name, int *value);
-TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
-                                         const char *name, BOOL *value);
-TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
-                                           const char *name, double *value);
-TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
-                                                  const char *name);
-
-TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
-TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
-                                              const char *filename);
-
-TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode oem,
-                              char **configs, int configs_size);
-TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode oem);
-TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
-                              const char *language);
-
-TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode mode,
-                              char **configs, int configs_size, char **vars_vec,
-                              char **vars_values, size_t vars_vec_size,
-                              BOOL set_only_non_debug_params);
-
-TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
-                              const char *language, TessOcrEngineMode mode,
-                              char **configs, int configs_size, char **vars_vec,
-                              char **vars_values, size_t vars_vec_size,
-                              BOOL set_only_non_debug_params);
-
-TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
-    const TessBaseAPI *handle);
-TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
-    const TessBaseAPI *handle);
-TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
-    const TessBaseAPI *handle);
-
-TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
-                                        const char *filename);
-TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
-                                             const char *filename);
-
-TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
-                                        TessPageSegMode mode);
-TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
-
-TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
-                               const unsigned char *imagedata,
-                               int bytes_per_pixel, int bytes_per_line,
-                               int left, int top, int width, int height);
-
-TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
-                                  const unsigned char *imagedata, int width,
-                                  int height, int bytes_per_pixel,
-                                  int bytes_per_line);
-TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
-
-TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
-
-TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
-                                      int width, int height);
-
-TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
-TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
-                                            struct Pixa **pixa);
-TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
-                                              struct Pixa **pixa,
-                                              int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
-                                               BOOL raw_image, int raw_padding,
-                                               struct Pixa **pixa,
-                                               int **blockids, int **paraids);
-TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
-                                           struct Pixa **pixa, int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
-                                          struct Pixa **pixa);
-TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
-                                                        struct Pixa **cc);
-TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
-                                                    TessPageIteratorLevel level,
-                                                    BOOL text_only,
-                                                    struct Pixa **pixa,
-                                                    int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
-    TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
-    BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
-    int **paraids);
-
-TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
-    const TessBaseAPI *handle);
-
-TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
-
-TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
-                                      const char *retry_config,
-                                      int timeout_millisec,
-                                      TessResultRenderer *renderer);
-TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
-                                     int page_index, const char *filename,
-                                     const char *retry_config,
-                                     int timeout_millisec,
-                                     TessResultRenderer *renderer);
-
-TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
-TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
-    TessBaseAPI *handle);
-
-TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
-TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
-
-TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
-
-TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
-                                            int page_number);
-
-TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
-TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
-
-TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
-
-#ifndef DISABLED_LEGACY_ENGINE
-TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
-                                        TessPageSegMode mode,
-                                        const char *wordstr);
-#endif // #ifndef DISABLED_LEGACY_ENGINE
-
-TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
-TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
-TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
-                                          float *out_slope);
-
-TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
-
-TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-// Call TessDeleteText(*best_script_name) to free memory allocated by this
-// function
-TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
-                                                 int *orient_deg,
-                                                 float *orient_conf,
-                                                 const char **script_name,
-                                                 float *script_conf);
-#endif // #ifndef DISABLED_LEGACY_ENGINE
-
-TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
-                                                 double margin);
-
-TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
-
-TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
-
-TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
-                                               int **block_orientation,
-                                               bool **vertical_writing);
-
-/* Page iterator */
-
-TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
-
-TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
-
-TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
-
-TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
-                                   TessPageIteratorLevel level);
-
-TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
-                                              TessPageIteratorLevel level);
-
-TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
-                                               TessPageIteratorLevel level,
-                                               TessPageIteratorLevel element);
-
-TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
-                                          TessPageIteratorLevel level,
-                                          int *left, int *top, int *right,
-                                          int *bottom);
-
-TESS_API TessPolyBlockType
-TessPageIteratorBlockType(const TessPageIterator *handle);
-
-TESS_API struct Pix *TessPageIteratorGetBinaryImage(
-    const TessPageIterator *handle, TessPageIteratorLevel level);
-
-TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
-                                              TessPageIteratorLevel level,
-                                              int padding,
-                                              struct Pix *original_image,
-                                              int *left, int *top);
-
-TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
-                                       TessPageIteratorLevel level, int *x1,
-                                       int *y1, int *x2, int *y2);
-
-TESS_API void TessPageIteratorOrientation(
-    TessPageIterator *handle, TessOrientation *orientation,
-    TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
-    float *deskew_angle);
-
-TESS_API void TessPageIteratorParagraphInfo(
-    TessPageIterator *handle, TessParagraphJustification *justification,
-    BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
-
-/* Result iterator */
-
-TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
-TESS_API TessResultIterator *TessResultIteratorCopy(
-    const TessResultIterator *handle);
-TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
-    TessResultIterator *handle);
-TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
-    const TessResultIterator *handle);
-TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
-    const TessResultIterator *handle);
-
-TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
-                                     TessPageIteratorLevel level);
-TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
-                                             TessPageIteratorLevel level);
-TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
-                                            TessPageIteratorLevel level);
-TESS_API const char *TessResultIteratorWordRecognitionLanguage(
-    const TessResultIterator *handle);
-TESS_API const char *TessResultIteratorWordFontAttributes(
-    const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
-    BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
-    int *pointsize, int *font_id);
-
-TESS_API BOOL
-TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
-TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
-
-TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
-TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
-TESS_API const char *TessChoiceIteratorGetUTF8Text(
-    const TessChoiceIterator *handle);
-TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
-
-/* Progress monitor */
-
-TESS_API ETEXT_DESC *TessMonitorCreate();
-TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
-                                       TessCancelFunc cancelFunc);
-TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
-TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
-                                         TessProgressFunc progressFunc);
-TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // API_CAPI_H_
--- a/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/export.h
+++ b/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/export.h
@ -1,37 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        export.h
-// Description: Place holder
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_PLATFORM_H_
-#define TESSERACT_PLATFORM_H_
-
-#ifndef TESS_API
-#  if defined(_WIN32) || defined(__CYGWIN__)
-#    if defined(TESS_EXPORTS)
-#      define TESS_API __declspec(dllexport)
-#    elif defined(TESS_IMPORTS)
-#      define TESS_API __declspec(dllimport)
-#    else
-#      define TESS_API
-#    endif
-#  else
-#    if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
-#      define TESS_API __attribute__((visibility("default")))
-#    else
-#      define TESS_API
-#    endif
-#  endif
-#endif
-
-#endif // TESSERACT_PLATFORM_H_
--- a/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/ltrresultiterator.h
+++ b/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/ltrresultiterator.h
@ -1,235 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        ltrresultiterator.h
-// Description: Iterator for tesseract results in strict left-to-right
-//              order that avoids using tesseract internal data structures.
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
-#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
-
-#include "export.h"       // for TESS_API
-#include "pageiterator.h" // for PageIterator
-#include "publictypes.h"  // for PageIteratorLevel
-#include "unichar.h"      // for StrongScriptDirection
-
-namespace tesseract {
-
-class BLOB_CHOICE_IT;
-class PAGE_RES;
-class WERD_RES;
-
-class Tesseract;
-
-// Class to iterate over tesseract results, providing access to all levels
-// of the page hierarchy, without including any tesseract headers or having
-// to handle any tesseract structures.
-// WARNING! This class points to data held within the TessBaseAPI class, and
-// therefore can only be used while the TessBaseAPI class still exists and
-// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-// DetectOS, or anything else that changes the internal PAGE_RES.
-// See tesseract/publictypes.h for the definition of PageIteratorLevel.
-// See also base class PageIterator, which contains the bulk of the interface.
-// LTRResultIterator adds text-specific methods for access to OCR output.
-
-class TESS_API LTRResultIterator : public PageIterator {
-  friend class ChoiceIterator;
-
-public:
-  // page_res and tesseract come directly from the BaseAPI.
-  // The rectangle parameters are copied indirectly from the Thresholder,
-  // via the BaseAPI. They represent the coordinates of some rectangle in an
-  // original image (in top-left-origin coordinates) and therefore the top-left
-  // needs to be added to any output boxes in order to specify coordinates
-  // in the original image. See TessBaseAPI::SetRectangle.
-  // The scale and scaled_yres are in case the Thresholder scaled the image
-  // rectangle prior to thresholding. Any coordinates in tesseract's image
-  // must be divided by scale before adding (rect_left, rect_top).
-  // The scaled_yres indicates the effective resolution of the binary image
-  // that tesseract has been given by the Thresholder.
-  // After the constructor, Begin has already been called.
-  LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
-                    int scaled_yres, int rect_left, int rect_top,
-                    int rect_width, int rect_height);
-
-  ~LTRResultIterator() override;
-
-  // LTRResultIterators may be copied! This makes it possible to iterate over
-  // all the objects at a lower level, while maintaining an iterator to
-  // objects at a higher level. These constructors DO NOT CALL Begin, so
-  // iterations will continue from the location of src.
-  // TODO: For now the copy constructor and operator= only need the base class
-  // versions, but if new data members are added, don't forget to add them!
-
-  // ============= Moving around within the page ============.
-
-  // See PageIterator.
-
-  // ============= Accessing data ==============.
-
-  // Returns the null terminated UTF-8 encoded text string for the current
-  // object at the given level. Use delete [] to free after use.
-  char *GetUTF8Text(PageIteratorLevel level) const;
-
-  // Set the string inserted at the end of each text line. "\n" by default.
-  void SetLineSeparator(const char *new_line);
-
-  // Set the string inserted at the end of each paragraph. "\n" by default.
-  void SetParagraphSeparator(const char *new_para);
-
-  // Returns the mean confidence of the current object at the given level.
-  // The number should be interpreted as a percent probability. (0.0f-100.0f)
-  float Confidence(PageIteratorLevel level) const;
-
-  // ============= Functions that refer to words only ============.
-
-  // Returns the font attributes of the current word. If iterating at a higher
-  // level object than words, eg textlines, then this will return the
-  // attributes of the first word in that textline.
-  // The actual return value is a string representing a font name. It points
-  // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
-  // the iterator itself, ie rendered invalid by various members of
-  // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
-  // Pointsize is returned in printers points (1/72 inch.)
-  const char *WordFontAttributes(bool *is_bold, bool *is_italic,
-                                 bool *is_underlined, bool *is_monospace,
-                                 bool *is_serif, bool *is_smallcaps,
-                                 int *pointsize, int *font_id) const;
-
-  // Return the name of the language used to recognize this word.
-  // On error, nullptr.  Do not delete this pointer.
-  const char *WordRecognitionLanguage() const;
-
-  // Return the overall directionality of this word.
-  StrongScriptDirection WordDirection() const;
-
-  // Returns true if the current word was found in a dictionary.
-  bool WordIsFromDictionary() const;
-
-  // Returns the number of blanks before the current word.
-  int BlanksBeforeWord() const;
-
-  // Returns true if the current word is numeric.
-  bool WordIsNumeric() const;
-
-  // Returns true if the word contains blamer information.
-  bool HasBlamerInfo() const;
-
-  // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
-  // of the current word.
-  const void *GetParamsTrainingBundle() const;
-
-  // Returns a pointer to the string with blamer information for this word.
-  // Assumes that the word's blamer_bundle is not nullptr.
-  const char *GetBlamerDebug() const;
-
-  // Returns a pointer to the string with misadaption information for this word.
-  // Assumes that the word's blamer_bundle is not nullptr.
-  const char *GetBlamerMisadaptionDebug() const;
-
-  // Returns true if a truth string was recorded for the current word.
-  bool HasTruthString() const;
-
-  // Returns true if the given string is equivalent to the truth string for
-  // the current word.
-  bool EquivalentToTruth(const char *str) const;
-
-  // Returns a null terminated UTF-8 encoded truth string for the current word.
-  // Use delete [] to free after use.
-  char *WordTruthUTF8Text() const;
-
-  // Returns a null terminated UTF-8 encoded normalized OCR string for the
-  // current word. Use delete [] to free after use.
-  char *WordNormedUTF8Text() const;
-
-  // Returns a pointer to serialized choice lattice.
-  // Fills lattice_size with the number of bytes in lattice data.
-  const char *WordLattice(int *lattice_size) const;
-
-  // ============= Functions that refer to symbols only ============.
-
-  // Returns true if the current symbol is a superscript.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsSuperscript() const;
-  // Returns true if the current symbol is a subscript.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsSubscript() const;
-  // Returns true if the current symbol is a dropcap.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsDropcap() const;
-
-protected:
-  const char *line_separator_;
-  const char *paragraph_separator_;
-};
-
-// Class to iterate over the classifier choices for a single RIL_SYMBOL.
-class TESS_API ChoiceIterator {
-public:
-  // Construction is from a LTRResultIterator that points to the symbol of
-  // interest. The ChoiceIterator allows a one-shot iteration over the
-  // choices for this symbol and after that it is useless.
-  explicit ChoiceIterator(const LTRResultIterator &result_it);
-  ~ChoiceIterator();
-
-  // Moves to the next choice for the symbol and returns false if there
-  // are none left.
-  bool Next();
-
-  // ============= Accessing data ==============.
-
-  // Returns the null terminated UTF-8 encoded text string for the current
-  // choice.
-  // NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
-  // internal structure and should NOT be delete[]ed to free after use.
-  const char *GetUTF8Text() const;
-
-  // Returns the confidence of the current choice depending on the used language
-  // data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
-  // choices for one symbol should roughly add up to 1.0f.
-  // If only traineddata of the legacy engine is used, the number should be
-  // interpreted as a percent probability. (0.0f-100.0f) In this case
-  // probabilities won't add up to 100. Each one stands on its own.
-  float Confidence() const;
-
-  // Returns a vector containing all timesteps, which belong to the currently
-  // selected symbol. A timestep is a vector containing pairs of symbols and
-  // floating point numbers. The number states the probability for the
-  // corresponding symbol.
-  std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
-
-private:
-  // clears the remaining spaces out of the results and adapt the probabilities
-  void filterSpaces();
-  // Pointer to the WERD_RES object owned by the API.
-  WERD_RES *word_res_;
-  // Iterator over the blob choices.
-  BLOB_CHOICE_IT *choice_it_;
-  std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
-  std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
-
-  const int *tstep_index_;
-  // regulates the rating granularity
-  double rating_coefficient_;
-  // leading blanks
-  int blanks_before_word_;
-  // true when there is lstm engine related trained data
-  bool oemLSTM_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/ocrclass.h
+++ b/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/ocrclass.h
@ -1,158 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-/**********************************************************************
- * File:        ocrclass.h
- * Description: Class definitions and constants for the OCR API.
- * Author:      Hewlett-Packard Co
- *
- * (C) Copyright 1996, Hewlett-Packard Co.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-/**********************************************************************
- * This file contains typedefs for all the structures used by
- * the HP OCR interface.
- * The structures are designed to allow them to be used with any
- * structure alignment up to 8.
- **********************************************************************/
-
-#ifndef CCUTIL_OCRCLASS_H_
-#define CCUTIL_OCRCLASS_H_
-
-#include <chrono>
-#include <ctime>
-
-namespace tesseract {
-
-/**********************************************************************
- * EANYCODE_CHAR
- * Description of a single character. The character code is defined by
- * the character set of the current font.
- * Output text is sent as an array of these structures.
- * Spaces and line endings in the output are represented in the
- * structures of the surrounding characters. They are not directly
- * represented as characters.
- * The first character in a word has a positive value of blanks.
- * Missing information should be set to the defaults in the comments.
- * If word bounds are known, but not character bounds, then the top and
- * bottom of each character should be those of the word. The left of the
- * first and right of the last char in each word should be set. All other
- * lefts and rights should be set to -1.
- * If set, the values of right and bottom are left+width and top+height.
- * Most of the members come directly from the parameters to ocr_append_char.
- * The formatting member uses the enhancement parameter and combines the
- * line direction stuff into the top 3 bits.
- * The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
- * 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
- * the coding is, only that it is backwards compatible with the previous
- * version.
- **********************************************************************/
-
-struct EANYCODE_CHAR { /*single character */
-  // It should be noted that the format for char_code for version 2.0 and beyond
-  // is UTF8 which means that ASCII characters will come out as one structure
-  // but other characters will be returned in two or more instances of this
-  // structure with a single byte of the  UTF8 code in each, but each will have
-  // the same bounding box. Programs which want to handle languagues with
-  // different characters sets will need to handle extended characters
-  // appropriately, but *all* code needs to be prepared to receive UTF8 coded
-  // characters for characters such as bullet and fancy quotes.
-  uint16_t char_code; /*character itself */
-  int16_t left;       /*of char (-1) */
-  int16_t right;      /*of char (-1) */
-  int16_t top;        /*of char (-1) */
-  int16_t bottom;     /*of char (-1) */
-  int16_t font_index; /*what font (0) */
-  uint8_t confidence; /*0=perfect, 100=reject (0/100) */
-  uint8_t point_size; /*of char, 72=i inch, (10) */
-  int8_t blanks;      /*no of spaces before this char (1) */
-  uint8_t formatting; /*char formatting (0) */
-};
-
-/**********************************************************************
- * ETEXT_DESC
- * Description of the output of the OCR engine.
- * This structure is used as both a progress monitor and the final
- * output header, since it needs to be a valid progress monitor while
- * the OCR engine is storing its output to shared memory.
- * During progress, all the buffer info is -1.
- * Progress starts at 0 and increases to 100 during OCR. No other constraint.
- * Additionally the progress callback contains the bounding box of the word that
- * is currently being processed.
- * Every progress callback, the OCR engine must set ocr_alive to 1.
- * The HP side will set ocr_alive to 0. Repeated failure to reset
- * to 1 indicates that the OCR engine is dead.
- * If the cancel function is not null then it is called with the number of
- * user words found. If it returns true then operation is cancelled.
- **********************************************************************/
-class ETEXT_DESC;
-
-using CANCEL_FUNC = bool (*)(void *, int);
-using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
-using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
-
-class ETEXT_DESC { // output header
-public:
-  int16_t count{0};    /// chars in this buffer(0)
-  int16_t progress{0}; /// percent complete increasing (0-100)
-  /** Progress monitor covers word recognition and it does not cover layout
-   * analysis.
-   * See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
-  int8_t more_to_come{0};       /// true if not last
-  volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
-  int8_t err_code{0};           /// for errcode use
-  CANCEL_FUNC cancel{nullptr};  /// returns true to cancel
-  PROGRESS_FUNC progress_callback{
-      nullptr};                      /// called whenever progress increases
-  PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
-  void *cancel_this{nullptr};        /// this or other data for cancel
-  std::chrono::steady_clock::time_point end_time;
-  /// Time to stop. Expected to be set only
-  /// by call to set_deadline_msecs().
-  EANYCODE_CHAR text[1]{}; /// character data
-
-  ETEXT_DESC() : progress_callback2(&default_progress_func) {
-    end_time = std::chrono::time_point<std::chrono::steady_clock,
-                                       std::chrono::milliseconds>();
-  }
-
-  // Sets the end time to be deadline_msecs milliseconds from now.
-  void set_deadline_msecs(int32_t deadline_msecs) {
-    if (deadline_msecs > 0) {
-      end_time = std::chrono::steady_clock::now() +
-                 std::chrono::milliseconds(deadline_msecs);
-    }
-  }
-
-  // Returns false if we've not passed the end_time, or have not set a deadline.
-  bool deadline_exceeded() const {
-    if (end_time.time_since_epoch() ==
-        std::chrono::steady_clock::duration::zero()) {
-      return false;
-    }
-    auto now = std::chrono::steady_clock::now();
-    return (now > end_time);
-  }
-
-private:
-  static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
-                                    int top, int bottom) {
-    if (ths->progress_callback != nullptr) {
-      return (*(ths->progress_callback))(ths->progress, left, right, top,
-                                         bottom);
-    }
-    return true;
-  }
-};
-
-} // namespace tesseract
-
-#endif // CCUTIL_OCRCLASS_H_
--- a/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/osdetect.h
+++ b/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/osdetect.h
@ -1,139 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        osdetect.h
-// Description: Orientation and script detection.
-// Author:      Samuel Charron
-//              Ranjith Unnikrishnan
-//
-// (C) Copyright 2008, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_OSDETECT_H_
-#define TESSERACT_CCMAIN_OSDETECT_H_
-
-#include "export.h" // for TESS_API
-
-#include <vector> // for std::vector
-
-namespace tesseract {
-
-class BLOBNBOX;
-class BLOBNBOX_CLIST;
-class BLOB_CHOICE_LIST;
-class TO_BLOCK_LIST;
-class UNICHARSET;
-
-class Tesseract;
-
-// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
-const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
-
-struct OSBestResult {
-  OSBestResult()
-      : orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
-  int orientation_id;
-  int script_id;
-  float sconfidence;
-  float oconfidence;
-};
-
-struct OSResults {
-  OSResults() : unicharset(nullptr) {
-    for (int i = 0; i < 4; ++i) {
-      for (int j = 0; j < kMaxNumberOfScripts; ++j) {
-        scripts_na[i][j] = 0;
-      }
-      orientations[i] = 0;
-    }
-  }
-  void update_best_orientation();
-  // Set the estimate of the orientation to the given id.
-  void set_best_orientation(int orientation_id);
-  // Update/Compute the best estimate of the script assuming the given
-  // orientation id.
-  void update_best_script(int orientation_id);
-  // Return the index of the script with the highest score for this orientation.
-  TESS_API int get_best_script(int orientation_id) const;
-  // Accumulate scores with given OSResults instance and update the best script.
-  void accumulate(const OSResults &osr);
-
-  // Print statistics.
-  void print_scores(void) const;
-  void print_scores(int orientation_id) const;
-
-  // Array holding scores for each orientation id [0,3].
-  // Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
-  // page respectively, where the values refer to the amount of clockwise
-  // rotation to be applied to the page for the text to be upright and readable.
-  float orientations[4];
-  // Script confidence scores for each of 4 possible orientations.
-  float scripts_na[4][kMaxNumberOfScripts];
-
-  UNICHARSET *unicharset;
-  OSBestResult best_result;
-};
-
-class OrientationDetector {
-public:
-  OrientationDetector(const std::vector<int> *allowed_scripts,
-                      OSResults *results);
-  bool detect_blob(BLOB_CHOICE_LIST *scores);
-  int get_orientation();
-
-private:
-  OSResults *osr_;
-  const std::vector<int> *allowed_scripts_;
-};
-
-class ScriptDetector {
-public:
-  ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
-                 tesseract::Tesseract *tess);
-  void detect_blob(BLOB_CHOICE_LIST *scores);
-  bool must_stop(int orientation) const;
-
-private:
-  OSResults *osr_;
-  static const char *korean_script_;
-  static const char *japanese_script_;
-  static const char *fraktur_script_;
-  int korean_id_;
-  int japanese_id_;
-  int katakana_id_;
-  int hiragana_id_;
-  int han_id_;
-  int hangul_id_;
-  int latin_id_;
-  int fraktur_id_;
-  tesseract::Tesseract *tess_;
-  const std::vector<int> *allowed_scripts_;
-};
-
-int orientation_and_script_detection(const char *filename, OSResults *,
-                                     tesseract::Tesseract *);
-
-int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
-              tesseract::Tesseract *tess);
-
-int os_detect_blobs(const std::vector<int> *allowed_scripts,
-                    BLOBNBOX_CLIST *blob_list, OSResults *osr,
-                    tesseract::Tesseract *tess);
-
-bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
-                    OSResults *, tesseract::Tesseract *tess);
-
-// Helper method to convert an orientation index to its value in degrees.
-// The value represents the amount of clockwise rotation in degrees that must be
-// applied for the text to be upright (readable).
-TESS_API int OrientationIdToValue(const int &id);
-
-} // namespace tesseract
-
-#endif // TESSERACT_CCMAIN_OSDETECT_H_
--- a/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/pageiterator.h
+++ b/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/pageiterator.h
@ -1,364 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        pageiterator.h
-// Description: Iterator for tesseract page structure that avoids using
-//              tesseract internal data structures.
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
-#define TESSERACT_CCMAIN_PAGEITERATOR_H_
-
-#include "export.h"
-#include "publictypes.h"
-
-struct Pix;
-struct Pta;
-
-namespace tesseract {
-
-struct BlamerBundle;
-class C_BLOB_IT;
-class PAGE_RES;
-class PAGE_RES_IT;
-class WERD;
-
-class Tesseract;
-
-/**
- * Class to iterate over tesseract page structure, providing access to all
- * levels of the page hierarchy, without including any tesseract headers or
- * having to handle any tesseract structures.
- * WARNING! This class points to data held within the TessBaseAPI class, and
- * therefore can only be used while the TessBaseAPI class still exists and
- * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
- * DetectOS, or anything else that changes the internal PAGE_RES.
- * See tesseract/publictypes.h for the definition of PageIteratorLevel.
- * See also ResultIterator, derived from PageIterator, which adds in the
- * ability to access OCR output with text-specific methods.
- */
-
-class TESS_API PageIterator {
-public:
-  /**
-   * page_res and tesseract come directly from the BaseAPI.
-   * The rectangle parameters are copied indirectly from the Thresholder,
-   * via the BaseAPI. They represent the coordinates of some rectangle in an
-   * original image (in top-left-origin coordinates) and therefore the top-left
-   * needs to be added to any output boxes in order to specify coordinates
-   * in the original image. See TessBaseAPI::SetRectangle.
-   * The scale and scaled_yres are in case the Thresholder scaled the image
-   * rectangle prior to thresholding. Any coordinates in tesseract's image
-   * must be divided by scale before adding (rect_left, rect_top).
-   * The scaled_yres indicates the effective resolution of the binary image
-   * that tesseract has been given by the Thresholder.
-   * After the constructor, Begin has already been called.
-   */
-  PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
-               int scaled_yres, int rect_left, int rect_top, int rect_width,
-               int rect_height);
-  virtual ~PageIterator();
-
-  /**
-   * Page/ResultIterators may be copied! This makes it possible to iterate over
-   * all the objects at a lower level, while maintaining an iterator to
-   * objects at a higher level. These constructors DO NOT CALL Begin, so
-   * iterations will continue from the location of src.
-   */
-  PageIterator(const PageIterator &src);
-  const PageIterator &operator=(const PageIterator &src);
-
-  /** Are we positioned at the same location as other? */
-  bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
-
-  // ============= Moving around within the page ============.
-
-  /**
-   * Moves the iterator to point to the start of the page to begin an
-   * iteration.
-   */
-  virtual void Begin();
-
-  /**
-   * Moves the iterator to the beginning of the paragraph.
-   * This class implements this functionality by moving it to the zero indexed
-   * blob of the first (leftmost) word on the first row of the paragraph.
-   */
-  virtual void RestartParagraph();
-
-  /**
-   * Return whether this iterator points anywhere in the first textline of a
-   * paragraph.
-   */
-  bool IsWithinFirstTextlineOfParagraph() const;
-
-  /**
-   * Moves the iterator to the beginning of the text line.
-   * This class implements this functionality by moving it to the zero indexed
-   * blob of the first (leftmost) word of the row.
-   */
-  virtual void RestartRow();
-
-  /**
-   * Moves to the start of the next object at the given level in the
-   * page hierarchy, and returns false if the end of the page was reached.
-   * NOTE that RIL_SYMBOL will skip non-text blocks, but all other
-   * PageIteratorLevel level values will visit each non-text block once.
-   * Think of non text blocks as containing a single para, with a single line,
-   * with a single imaginary word.
-   * Calls to Next with different levels may be freely intermixed.
-   * This function iterates words in right-to-left scripts correctly, if
-   * the appropriate language has been loaded into Tesseract.
-   */
-  virtual bool Next(PageIteratorLevel level);
-
-  /**
-   * Returns true if the iterator is at the start of an object at the given
-   * level.
-   *
-   * For instance, suppose an iterator it is pointed to the first symbol of the
-   * first word of the third line of the second paragraph of the first block in
-   * a page, then:
-   *   it.IsAtBeginningOf(RIL_BLOCK) = false
-   *   it.IsAtBeginningOf(RIL_PARA) = false
-   *   it.IsAtBeginningOf(RIL_TEXTLINE) = true
-   *   it.IsAtBeginningOf(RIL_WORD) = true
-   *   it.IsAtBeginningOf(RIL_SYMBOL) = true
-   */
-  virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
-
-  /**
-   * Returns whether the iterator is positioned at the last element in a
-   * given level. (e.g. the last word in a line, the last line in a block)
-   *
-   *     Here's some two-paragraph example
-   *   text.  It starts off innocuously
-   *   enough but quickly turns bizarre.
-   *     The author inserts a cornucopia
-   *   of words to guard against confused
-   *   references.
-   *
-   * Now take an iterator it pointed to the start of "bizarre."
-   *  it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
-   *  it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
-   *  it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
-   */
-  virtual bool IsAtFinalElement(PageIteratorLevel level,
-                                PageIteratorLevel element) const;
-
-  /**
-   * Returns whether this iterator is positioned
-   *   before other:   -1
-   *   equal to other:  0
-   *   after other:     1
-   */
-  int Cmp(const PageIterator &other) const;
-
-  // ============= Accessing data ==============.
-  // Coordinate system:
-  // Integer coordinates are at the cracks between the pixels.
-  // The top-left corner of the top-left pixel in the image is at (0,0).
-  // The bottom-right corner of the bottom-right pixel in the image is at
-  // (width, height).
-  // Every bounding box goes from the top-left of the top-left contained
-  // pixel to the bottom-right of the bottom-right contained pixel, so
-  // the bounding box of the single top-left pixel in the image is:
-  // (0,0)->(1,1).
-  // If an image rectangle has been set in the API, then returned coordinates
-  // relate to the original (full) image, rather than the rectangle.
-
-  /**
-   * Controls what to include in a bounding box. Bounding boxes of all levels
-   * between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
-   * Between layout analysis and recognition, it isn't known where all
-   * diacritics belong, so this control is used to include or exclude some
-   * diacritics that are above or below the main body of the word. In most cases
-   * where the placement is obvious, and after recognition, it doesn't make as
-   * much difference, as the diacritics will already be included in the word.
-   */
-  void SetBoundingBoxComponents(bool include_upper_dots,
-                                bool include_lower_dots) {
-    include_upper_dots_ = include_upper_dots;
-    include_lower_dots_ = include_lower_dots;
-  }
-
-  /**
-   * Returns the bounding rectangle of the current object at the given level.
-   * See comment on coordinate system above.
-   * Returns false if there is no such object at the current position.
-   * The returned bounding box is guaranteed to match the size and position
-   * of the image returned by GetBinaryImage, but may clip foreground pixels
-   * from a grey image. The padding argument to GetImage can be used to expand
-   * the image to include more foreground pixels. See GetImage below.
-   */
-  bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
-                   int *bottom) const;
-  bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
-                   int *right, int *bottom) const;
-  /**
-   * Returns the bounding rectangle of the object in a coordinate system of the
-   * working image rectangle having its origin at (rect_left_, rect_top_) with
-   * respect to the original image and is scaled by a factor scale_.
-   */
-  bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
-                           int *right, int *bottom) const;
-
-  /** Returns whether there is no object of a given level. */
-  bool Empty(PageIteratorLevel level) const;
-
-  /**
-   * Returns the type of the current block.
-   * See tesseract/publictypes.h for PolyBlockType.
-   */
-  PolyBlockType BlockType() const;
-
-  /**
-   * Returns the polygon outline of the current block. The returned Pta must
-   * be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
-   * of the polygon, and the last edge is the line segment between the last
-   * point and the first point. nullptr will be returned if the iterator is
-   * at the end of the document or layout analysis was not used.
-   */
-  Pta *BlockPolygon() const;
-
-  /**
-   * Returns a binary image of the current object at the given level.
-   * The position and size match the return from BoundingBoxInternal, and so
-   * this could be upscaled with respect to the original input image.
-   * Use pixDestroy to delete the image after use.
-   */
-  Pix *GetBinaryImage(PageIteratorLevel level) const;
-
-  /**
-   * Returns an image of the current object at the given level in greyscale
-   * if available in the input. To guarantee a binary image use BinaryImage.
-   * NOTE that in order to give the best possible image, the bounds are
-   * expanded slightly over the binary connected component, by the supplied
-   * padding, so the top-left position of the returned image is returned
-   * in (left,top). These will most likely not match the coordinates
-   * returned by BoundingBox.
-   * If you do not supply an original image, you will get a binary one.
-   * Use pixDestroy to delete the image after use.
-   */
-  Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
-                int *left, int *top) const;
-
-  /**
-   * Returns the baseline of the current object at the given level.
-   * The baseline is the line that passes through (x1, y1) and (x2, y2).
-   * WARNING: with vertical text, baselines may be vertical!
-   * Returns false if there is no baseline at the current position.
-   */
-  bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
-                int *y2) const;
-
-  // Returns the attributes of the current row.
-  void RowAttributes(float *row_height, float *descenders,
-                     float *ascenders) const;
-
-  /**
-   * Returns orientation for the block the iterator points to.
-   *   orientation, writing_direction, textline_order: see publictypes.h
-   *   deskew_angle: after rotating the block so the text orientation is
-   *                 upright, how many radians does one have to rotate the
-   *                 block anti-clockwise for it to be level?
-   *                   -Pi/4 <= deskew_angle <= Pi/4
-   */
-  void Orientation(tesseract::Orientation *orientation,
-                   tesseract::WritingDirection *writing_direction,
-                   tesseract::TextlineOrder *textline_order,
-                   float *deskew_angle) const;
-
-  /**
-   * Returns information about the current paragraph, if available.
-   *
-   *   justification -
-   *     LEFT if ragged right, or fully justified and script is left-to-right.
-   *     RIGHT if ragged left, or fully justified and script is right-to-left.
-   *     unknown if it looks like source code or we have very few lines.
-   *   is_list_item -
-   *     true if we believe this is a member of an ordered or unordered list.
-   *   is_crown -
-   *     true if the first line of the paragraph is aligned with the other
-   *     lines of the paragraph even though subsequent paragraphs have first
-   *     line indents.  This typically indicates that this is the continuation
-   *     of a previous paragraph or that it is the very first paragraph in
-   *     the chapter.
-   *   first_line_indent -
-   *     For LEFT aligned paragraphs, the first text line of paragraphs of
-   *     this kind are indented this many pixels from the left edge of the
-   *     rest of the paragraph.
-   *     for RIGHT aligned paragraphs, the first text line of paragraphs of
-   *     this kind are indented this many pixels from the right edge of the
-   *     rest of the paragraph.
-   *     NOTE 1: This value may be negative.
-   *     NOTE 2: if *is_crown == true, the first line of this paragraph is
-   *             actually flush, and first_line_indent is set to the "common"
-   *             first_line_indent for subsequent paragraphs in this block
-   *             of text.
-   */
-  void ParagraphInfo(tesseract::ParagraphJustification *justification,
-                     bool *is_list_item, bool *is_crown,
-                     int *first_line_indent) const;
-
-  // If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
-  // of the current word to the given pointer (takes ownership of the pointer)
-  // and returns true.
-  // Can only be used when iterating on the word level.
-  bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
-
-protected:
-  /**
-   * Sets up the internal data for iterating the blobs of a new word, then
-   * moves the iterator to the given offset.
-   */
-  void BeginWord(int offset);
-
-  /** Pointer to the page_res owned by the API. */
-  PAGE_RES *page_res_;
-  /** Pointer to the Tesseract object owned by the API. */
-  Tesseract *tesseract_;
-  /**
-   * The iterator to the page_res_. Owned by this ResultIterator.
-   * A pointer just to avoid dragging in Tesseract includes.
-   */
-  PAGE_RES_IT *it_;
-  /**
-   * The current input WERD being iterated. If there is an output from OCR,
-   * then word_ is nullptr. Owned by the API
-   */
-  WERD *word_;
-  /** The length of the current word_. */
-  int word_length_;
-  /** The current blob index within the word. */
-  int blob_index_;
-  /**
-   * Iterator to the blobs within the word. If nullptr, then we are iterating
-   * OCR results in the box_word.
-   * Owned by this ResultIterator.
-   */
-  C_BLOB_IT *cblob_it_;
-  /** Control over what to include in bounding boxes. */
-  bool include_upper_dots_;
-  bool include_lower_dots_;
-  /** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
-  int scale_;
-  int scaled_yres_;
-  int rect_left_;
-  int rect_top_;
-  int rect_width_;
-  int rect_height_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/publictypes.h
+++ b/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/publictypes.h
@ -1,281 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        publictypes.h
-// Description: Types used in both the API and internally
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
-#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
-
-namespace tesseract {
-
-// This file contains types that are used both by the API and internally
-// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
-// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
-// Restated: It is OK for low-level Tesseract files to include publictypes.h,
-// but not for the low-level tesseract code to include top-level API code.
-// This file should not use other Tesseract types, as that would drag
-// their includes into the API-level.
-
-/** Number of printers' points in an inch. The unit of the pointsize return. */
-constexpr int kPointsPerInch = 72;
-/**
- * Minimum believable resolution. Used as a default if there is no other
- * information, as it is safer to under-estimate than over-estimate.
- */
-constexpr int kMinCredibleResolution = 70;
-/** Maximum believable resolution.  */
-constexpr int kMaxCredibleResolution = 2400;
-/**
- * Ratio between median blob size and likely resolution. Used to estimate
- * resolution when none is provided. This is basically 1/usual text size in
- * inches.  */
-constexpr int kResolutionEstimationFactor = 10;
-
-/**
- * Possible types for a POLY_BLOCK or ColPartition.
- * Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
- * below, as well as kPolyBlockNames in layout_test.cc.
- * Used extensively by ColPartition, and POLY_BLOCK.
- */
-enum PolyBlockType {
-  PT_UNKNOWN,         // Type is not yet known. Keep as the first element.
-  PT_FLOWING_TEXT,    // Text that lives inside a column.
-  PT_HEADING_TEXT,    // Text that spans more than one column.
-  PT_PULLOUT_TEXT,    // Text that is in a cross-column pull-out region.
-  PT_EQUATION,        // Partition belonging to an equation region.
-  PT_INLINE_EQUATION, // Partition has inline equation.
-  PT_TABLE,           // Partition belonging to a table region.
-  PT_VERTICAL_TEXT,   // Text-line runs vertically.
-  PT_CAPTION_TEXT,    // Text that belongs to an image.
-  PT_FLOWING_IMAGE,   // Image that lives inside a column.
-  PT_HEADING_IMAGE,   // Image that spans more than one column.
-  PT_PULLOUT_IMAGE,   // Image that is in a cross-column pull-out region.
-  PT_HORZ_LINE,       // Horizontal Line.
-  PT_VERT_LINE,       // Vertical Line.
-  PT_NOISE,           // Lies outside of any column.
-  PT_COUNT
-};
-
-/** Returns true if PolyBlockType is of horizontal line type */
-inline bool PTIsLineType(PolyBlockType type) {
-  return type == PT_HORZ_LINE || type == PT_VERT_LINE;
-}
-/** Returns true if PolyBlockType is of image type */
-inline bool PTIsImageType(PolyBlockType type) {
-  return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
-         type == PT_PULLOUT_IMAGE;
-}
-/** Returns true if PolyBlockType is of text type */
-inline bool PTIsTextType(PolyBlockType type) {
-  return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
-         type == PT_PULLOUT_TEXT || type == PT_TABLE ||
-         type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
-         type == PT_INLINE_EQUATION;
-}
-// Returns true if PolyBlockType is of pullout(inter-column) type
-inline bool PTIsPulloutType(PolyBlockType type) {
-  return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
-}
-
-/**
- *  +------------------+  Orientation Example:
- *  | 1 Aaaa Aaaa Aaaa |  ====================
- *  | Aaa aa aaa aa    |  To left is a diagram of some (1) English and
- *  | aaaaaa A aa aaa. |  (2) Chinese text and a (3) photo credit.
- *  |                2 |
- *  |   #######  c c C |  Upright Latin characters are represented as A and a.
- *  |   #######  c c c |  '<' represents a latin character rotated
- *  | < #######  c c c |      anti-clockwise 90 degrees.
- *  | < #######  c   c |
- *  | < #######  .   c |  Upright Chinese characters are represented C and c.
- *  | 3 #######      c |
- *  +------------------+  NOTA BENE: enum values here should match goodoc.proto
-
- * If you orient your head so that "up" aligns with Orientation,
- * then the characters will appear "right side up" and readable.
- *
- * In the example above, both the English and Chinese paragraphs are oriented
- * so their "up" is the top of the page (page up).  The photo credit is read
- * with one's head turned leftward ("up" is to page left).
- *
- * The values of this enum match the convention of Tesseract's osdetect.h
-*/
-enum Orientation {
-  ORIENTATION_PAGE_UP = 0,
-  ORIENTATION_PAGE_RIGHT = 1,
-  ORIENTATION_PAGE_DOWN = 2,
-  ORIENTATION_PAGE_LEFT = 3,
-};
-
-/**
- * The grapheme clusters within a line of text are laid out logically
- * in this direction, judged when looking at the text line rotated so that
- * its Orientation is "page up".
- *
- * For English text, the writing direction is left-to-right.  For the
- * Chinese text in the above example, the writing direction is top-to-bottom.
- */
-enum WritingDirection {
-  WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
-  WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
-  WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
-};
-
-/**
- * The text lines are read in the given sequence.
- *
- * In English, the order is top-to-bottom.
- * In Chinese, vertical text lines are read right-to-left.  Mongolian is
- * written in vertical columns top to bottom like Chinese, but the lines
- * order left-to right.
- *
- * Note that only some combinations make sense.  For example,
- * WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
- */
-enum TextlineOrder {
-  TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
-  TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
-  TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
-};
-
-/**
- * Possible modes for page layout analysis. These *must* be kept in order
- * of decreasing amount of layout analysis to be done, except for OSD_ONLY,
- * so that the inequality test macros below work.
- */
-enum PageSegMode {
-  PSM_OSD_ONLY = 0,      ///< Orientation and script detection only.
-  PSM_AUTO_OSD = 1,      ///< Automatic page segmentation with orientation and
-                         ///< script detection. (OSD)
-  PSM_AUTO_ONLY = 2,     ///< Automatic page segmentation, but no OSD, or OCR.
-  PSM_AUTO = 3,          ///< Fully automatic page segmentation, but no OSD.
-  PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
-  PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
-                                  ///< vertically aligned text.
-  PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
-  PSM_SINGLE_LINE = 7,  ///< Treat the image as a single text line.
-  PSM_SINGLE_WORD = 8,  ///< Treat the image as a single word.
-  PSM_CIRCLE_WORD = 9,  ///< Treat the image as a single word in a circle.
-  PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
-  PSM_SPARSE_TEXT =
-      11, ///< Find as much text as possible in no particular order.
-  PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
-  PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
-                     ///< hacks that are Tesseract-specific.
-
-  PSM_COUNT ///< Number of enum entries.
-};
-
-/**
- * Inline functions that act on a PageSegMode to determine whether components of
- * layout analysis are enabled.
- * *Depend critically on the order of elements of PageSegMode.*
- * NOTE that arg is an int for compatibility with INT_PARAM.
- */
-inline bool PSM_OSD_ENABLED(int pageseg_mode) {
-  return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
-  return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
-}
-inline bool PSM_SPARSE(int pageseg_mode) {
-  return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
-}
-inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
-}
-inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
-  return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
-         pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-
-/**
- * enum of the elements of the page hierarchy, used in ResultIterator
- * to provide functions that operate on each level without having to
- * have 5x as many functions.
- */
-enum PageIteratorLevel {
-  RIL_BLOCK,    // Block of text/image/separator line.
-  RIL_PARA,     // Paragraph within a block.
-  RIL_TEXTLINE, // Line within a paragraph.
-  RIL_WORD,     // Word within a textline.
-  RIL_SYMBOL    // Symbol/character within a word.
-};
-
-/**
- * JUSTIFICATION_UNKNOWN
- *   The alignment is not clearly one of the other options.  This could happen
- *   for example if there are only one or two lines of text or the text looks
- *   like source code or poetry.
- *
- * NOTA BENE: Fully justified paragraphs (text aligned to both left and right
- *    margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
- *    is written with a left-to-right script and with JUSTIFICATION_RIGHT if
- *    their text is written in a right-to-left script.
- *
- * Interpretation for text read in vertical lines:
- *   "Left" is wherever the starting reading position is.
- *
- * JUSTIFICATION_LEFT
- *   Each line, except possibly the first, is flush to the same left tab stop.
- *
- * JUSTIFICATION_CENTER
- *   The text lines of the paragraph are centered about a line going
- *   down through their middle of the text lines.
- *
- * JUSTIFICATION_RIGHT
- *   Each line, except possibly the first, is flush to the same right tab stop.
- */
-enum ParagraphJustification {
-  JUSTIFICATION_UNKNOWN,
-  JUSTIFICATION_LEFT,
-  JUSTIFICATION_CENTER,
-  JUSTIFICATION_RIGHT,
-};
-
-/**
- * When Tesseract/Cube is initialized we can choose to instantiate/load/run
- * only the Tesseract part, only the Cube part or both along with the combiner.
- * The preference of which engine to use is stored in tessedit_ocr_engine_mode.
- *
- * ATTENTION: When modifying this enum, please make sure to make the
- * appropriate changes to all the enums mirroring it (e.g. OCREngine in
- * cityblock/workflow/detection/detection_storage.proto). Such enums will
- * mention the connection to OcrEngineMode in the comments.
- */
-enum OcrEngineMode {
-  OEM_TESSERACT_ONLY,          // Run Tesseract only - fastest; deprecated
-  OEM_LSTM_ONLY,               // Run just the LSTM line recognizer.
-  OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
-                               // to Tesseract when things get difficult.
-                               // deprecated
-  OEM_DEFAULT,                 // Specify this mode when calling init_*(),
-                               // to indicate that any of the above modes
-                               // should be automatically inferred from the
-                               // variables in the language-specific config,
-                               // command-line configs, or if not specified
-                               // in any of the above should be set to the
-                               // default OEM_TESSERACT_ONLY.
-  OEM_COUNT                    // Number of OEMs
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
--- a/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/renderer.h
+++ b/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/renderer.h
@ -1,311 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        renderer.h
-// Description: Rendering interface to inject into TessBaseAPI
-//
-// (C) Copyright 2011, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_RENDERER_H_
-#define TESSERACT_API_RENDERER_H_
-
-#include "export.h"
-
-// To avoid collision with other typenames include the ABSOLUTE MINIMUM
-// complexity of includes here. Use forward declarations wherever possible
-// and hide includes of complex types in baseapi.cpp.
-#include <cstdint>
-#include <string> // for std::string
-#include <vector> // for std::vector
-
-struct Pix;
-
-namespace tesseract {
-
-class TessBaseAPI;
-
-/**
- * Interface for rendering tesseract results into a document, such as text,
- * HOCR or pdf. This class is abstract. Specific classes handle individual
- * formats. This interface is then used to inject the renderer class into
- * tesseract when processing images.
- *
- * For simplicity implementing this with tesseract version 3.01,
- * the renderer contains document state that is cleared from document
- * to document just as the TessBaseAPI is. This way the base API can just
- * delegate its rendering functionality to injected renderers, and the
- * renderers can manage the associated state needed for the specific formats
- * in addition to the heuristics for producing it.
- */
-class TESS_API TessResultRenderer {
-public:
-  virtual ~TessResultRenderer();
-
-  // Takes ownership of pointer so must be new'd instance.
-  // Renderers aren't ordered, but appends the sequences of next parameter
-  // and existing next(). The renderers should be unique across both lists.
-  void insert(TessResultRenderer *next);
-
-  // Returns the next renderer or nullptr.
-  TessResultRenderer *next() {
-    return next_;
-  }
-
-  /**
-   * Starts a new document with the given title.
-   * This clears the contents of the output data.
-   * Title should use UTF-8 encoding.
-   */
-  bool BeginDocument(const char *title);
-
-  /**
-   * Adds the recognized text from the source image to the current document.
-   * Invalid if BeginDocument not yet called.
-   *
-   * Note that this API is a bit weird but is designed to fit into the
-   * current TessBaseAPI implementation where the api has lots of state
-   * information that we might want to add in.
-   */
-  bool AddImage(TessBaseAPI *api);
-
-  /**
-   * Finishes the document and finalizes the output data
-   * Invalid if BeginDocument not yet called.
-   */
-  bool EndDocument();
-
-  const char *file_extension() const {
-    return file_extension_;
-  }
-  const char *title() const {
-    return title_.c_str();
-  }
-
-  // Is everything fine? Otherwise something went wrong.
-  bool happy() const {
-    return happy_;
-  }
-
-  /**
-   * Returns the index of the last image given to AddImage
-   * (i.e. images are incremented whether the image succeeded or not)
-   *
-   * This is always defined. It means either the number of the
-   * current image, the last image ended, or in the completed document
-   * depending on when in the document lifecycle you are looking at it.
-   * Will return -1 if a document was never started.
-   */
-  int imagenum() const {
-    return imagenum_;
-  }
-
-protected:
-  /**
-   * Called by concrete classes.
-   *
-   * outputbase is the name of the output file excluding
-   * extension. For example, "/path/to/chocolate-chip-cookie-recipe"
-   *
-   * extension indicates the file extension to be used for output
-   * files. For example "pdf" will produce a .pdf file, and "hocr"
-   * will produce .hocr files.
-   */
-  TessResultRenderer(const char *outputbase, const char *extension);
-
-  // Hook for specialized handling in BeginDocument()
-  virtual bool BeginDocumentHandler();
-
-  // This must be overridden to render the OCR'd results
-  virtual bool AddImageHandler(TessBaseAPI *api) = 0;
-
-  // Hook for specialized handling in EndDocument()
-  virtual bool EndDocumentHandler();
-
-  // Renderers can call this to append '\0' terminated strings into
-  // the output string returned by GetOutput.
-  // This method will grow the output buffer if needed.
-  void AppendString(const char *s);
-
-  // Renderers can call this to append binary byte sequences into
-  // the output string returned by GetOutput. Note that s is not necessarily
-  // '\0' terminated (and can contain '\0' within it).
-  // This method will grow the output buffer if needed.
-  void AppendData(const char *s, int len);
-
-private:
-  TessResultRenderer *next_;   // Can link multiple renderers together
-  FILE *fout_;                 // output file pointer
-  const char *file_extension_; // standard extension for generated output
-  std::string title_;          // title of document being rendered
-  int imagenum_;               // index of last image added
-  bool happy_;                 // I get grumpy when the disk fills up, etc.
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessTextRenderer : public TessResultRenderer {
-public:
-  explicit TessTextRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into an hocr text string
- */
-class TESS_API TessHOcrRenderer : public TessResultRenderer {
-public:
-  explicit TessHOcrRenderer(const char *outputbase, bool font_info);
-  explicit TessHOcrRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool font_info_; // whether to print font information
-};
-
-/**
- * Renders tesseract output into an alto text string
- */
-class TESS_API TessAltoRenderer : public TessResultRenderer {
-public:
-  explicit TessAltoRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool begin_document;
-};
-
-/**
- * Renders Tesseract output into a TSV string
- */
-class TESS_API TessTsvRenderer : public TessResultRenderer {
-public:
-  explicit TessTsvRenderer(const char *outputbase, bool font_info);
-  explicit TessTsvRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool font_info_; // whether to print font information
-};
-
-/**
- * Renders tesseract output into searchable PDF
- */
-class TESS_API TessPDFRenderer : public TessResultRenderer {
-public:
-  // datadir is the location of the TESSDATA. We need it because
-  // we load a custom PDF font from this location.
-  TessPDFRenderer(const char *outputbase, const char *datadir,
-                  bool textonly = false);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  // We don't want to have every image in memory at once,
-  // so we store some metadata as we go along producing
-  // PDFs one page at a time. At the end, that metadata is
-  // used to make everything that isn't easily handled in a
-  // streaming fashion.
-  long int obj_;                  // counter for PDF objects
-  std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
-  std::vector<long int> pages_;   // object number for every /Page object
-  std::string datadir_;           // where to find the custom font
-  bool textonly_;                 // skip images if set
-  // Bookkeeping only. DIY = Do It Yourself.
-  void AppendPDFObjectDIY(size_t objectsize);
-  // Bookkeeping + emit data.
-  void AppendPDFObject(const char *data);
-  // Create the /Contents object for an entire page.
-  char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
-  // Turn an image into a PDF object. Only transcode if we have to.
-  static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
-                            char **pdf_object, long int *pdf_object_size,
-                            int jpg_quality);
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessUnlvRenderer : public TessResultRenderer {
-public:
-  explicit TessUnlvRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string for LSTMBox
- */
-class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
-public:
-  explicit TessLSTMBoxRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessBoxTextRenderer : public TessResultRenderer {
-public:
-  explicit TessBoxTextRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string in WordStr format
- */
-class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
-public:
-  explicit TessWordStrBoxRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-/**
- * Renders tesseract output into an osd text string
- */
-class TESS_API TessOsdRenderer : public TessResultRenderer {
-public:
-  explicit TessOsdRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-#endif // ndef DISABLED_LEGACY_ENGINE
-
-} // namespace tesseract.
-
-#endif // TESSERACT_API_RENDERER_H_
--- a/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/resultiterator.h
+++ b/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/resultiterator.h
@ -1,250 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        resultiterator.h
-// Description: Iterator for tesseract results that is capable of
-//              iterating in proper reading order over Bi Directional
-//              (e.g. mixed Hebrew and English) text.
-// Author:      David Eger
-//
-// (C) Copyright 2011, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
-#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
-
-#include "export.h"            // for TESS_API, TESS_LOCAL
-#include "ltrresultiterator.h" // for LTRResultIterator
-#include "publictypes.h"       // for PageIteratorLevel
-#include "unichar.h"           // for StrongScriptDirection
-
-#include <set>    // for std::pair
-#include <vector> // for std::vector
-
-namespace tesseract {
-
-class TESS_API ResultIterator : public LTRResultIterator {
-public:
-  static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
-
-  /**
-   * ResultIterator is copy constructible!
-   * The default copy constructor works just fine for us.
-   */
-  ~ResultIterator() override = default;
-
-  // ============= Moving around within the page ============.
-  /**
-   * Moves the iterator to point to the start of the page to begin
-   * an iteration.
-   */
-  void Begin() override;
-
-  /**
-   * Moves to the start of the next object at the given level in the
-   * page hierarchy in the appropriate reading order and returns false if
-   * the end of the page was reached.
-   * NOTE that RIL_SYMBOL will skip non-text blocks, but all other
-   * PageIteratorLevel level values will visit each non-text block once.
-   * Think of non text blocks as containing a single para, with a single line,
-   * with a single imaginary word.
-   * Calls to Next with different levels may be freely intermixed.
-   * This function iterates words in right-to-left scripts correctly, if
-   * the appropriate language has been loaded into Tesseract.
-   */
-  bool Next(PageIteratorLevel level) override;
-
-  /**
-   * IsAtBeginningOf() returns whether we're at the logical beginning of the
-   * given level.  (as opposed to ResultIterator's left-to-right top-to-bottom
-   * order).  Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
-   * For a full description, see pageiterator.h
-   */
-  bool IsAtBeginningOf(PageIteratorLevel level) const override;
-
-  /**
-   * Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
-   * For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
-   * point at the last word in a paragraph.  See PageIterator for full comment.
-   */
-  bool IsAtFinalElement(PageIteratorLevel level,
-                        PageIteratorLevel element) const override;
-
-  // ============= Functions that refer to words only ============.
-  // Returns the number of blanks before the current word.
-  int BlanksBeforeWord() const;
-
-  // ============= Accessing data ==============.
-
-  /**
-   * Returns the null terminated UTF-8 encoded text string for the current
-   * object at the given level. Use delete [] to free after use.
-   */
-  virtual char *GetUTF8Text(PageIteratorLevel level) const;
-
-  /**
-   * Returns the LSTM choices for every LSTM timestep for the current word.
-   */
-  virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
-      *GetRawLSTMTimesteps() const;
-  virtual std::vector<std::vector<std::pair<const char *, float>>>
-      *GetBestLSTMSymbolChoices() const;
-
-  /**
-   * Return whether the current paragraph's dominant reading direction
-   * is left-to-right (as opposed to right-to-left).
-   */
-  bool ParagraphIsLtr() const;
-
-  // ============= Exposed only for testing =============.
-
-  /**
-   * Yields the reading order as a sequence of indices and (optional)
-   * meta-marks for a set of words (given left-to-right).
-   * The meta marks are passed as negative values:
-   *   kMinorRunStart  Start of minor direction text.
-   *   kMinorRunEnd    End of minor direction text.
-   *   kComplexWord    The next indexed word contains both left-to-right and
-   *                    right-to-left characters and was treated as neutral.
-   *
-   * For example, suppose we have five words in a text line,
-   * indexed [0,1,2,3,4] from the leftmost side of the text line.
-   * The following are all believable reading_orders:
-   *
-   * Left-to-Right (in ltr paragraph):
-   *     { 0, 1, 2, 3, 4 }
-   * Left-to-Right (in rtl paragraph):
-   *     { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
-   * Right-to-Left (in rtl paragraph):
-   *     { 4, 3, 2, 1, 0 }
-   * Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
-   *     { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
-   */
-  static void CalculateTextlineOrder(
-      bool paragraph_is_ltr,
-      const std::vector<StrongScriptDirection> &word_dirs,
-      std::vector<int> *reading_order);
-
-  static const int kMinorRunStart;
-  static const int kMinorRunEnd;
-  static const int kComplexWord;
-
-protected:
-  /**
-   * We presume the data associated with the given iterator will outlive us.
-   * NB: This is private because it does something that is non-obvious:
-   *   it resets to the beginning of the paragraph instead of staying wherever
-   *   resit might have pointed.
-   */
-  explicit ResultIterator(const LTRResultIterator &resit);
-
-private:
-  /**
-   * Calculates the current paragraph's dominant writing direction.
-   * Typically, members should use current_paragraph_ltr_ instead.
-   */
-  bool CurrentParagraphIsLtr() const;
-
-  /**
-   * Returns word indices as measured from resit->RestartRow() = index 0
-   * for the reading order of words within a textline given an iterator
-   * into the middle of the text line.
-   * In addition to non-negative word indices, the following negative values
-   * may be inserted:
-   *   kMinorRunStart  Start of minor direction text.
-   *   kMinorRunEnd    End of minor direction text.
-   *   kComplexWord    The previous word contains both left-to-right and
-   *                   right-to-left characters and was treated as neutral.
-   */
-  void CalculateTextlineOrder(bool paragraph_is_ltr,
-                              const LTRResultIterator &resit,
-                              std::vector<int> *indices) const;
-  /** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
-  void CalculateTextlineOrder(bool paragraph_is_ltr,
-                              const LTRResultIterator &resit,
-                              std::vector<StrongScriptDirection> *ssd,
-                              std::vector<int> *indices) const;
-
-  /**
-   * What is the index of the current word in a strict left-to-right reading
-   * of the row?
-   */
-  int LTRWordIndex() const;
-
-  /**
-   * Given an iterator pointing at a word, returns the logical reading order
-   * of blob indices for the word.
-   */
-  void CalculateBlobOrder(std::vector<int> *blob_indices) const;
-
-  /** Precondition: current_paragraph_is_ltr_ is set. */
-  void MoveToLogicalStartOfTextline();
-
-  /**
-   * Precondition: current_paragraph_is_ltr_ and in_minor_direction_
-   * are set.
-   */
-  void MoveToLogicalStartOfWord();
-
-  /** Are we pointing at the final (reading order) symbol of the word? */
-  bool IsAtFinalSymbolOfWord() const;
-
-  /** Are we pointing at the first (reading order) symbol of the word? */
-  bool IsAtFirstSymbolOfWord() const;
-
-  /**
-   * Append any extra marks that should be appended to this word when printed.
-   * Mostly, these are Unicode BiDi control characters.
-   */
-  void AppendSuffixMarks(std::string *text) const;
-
-  /** Appends the current word in reading order to the given buffer.*/
-  void AppendUTF8WordText(std::string *text) const;
-
-  /**
-   * Appends the text of the current text line, *assuming this iterator is
-   * positioned at the beginning of the text line*  This function
-   * updates the iterator to point to the first position past the text line.
-   * Each textline is terminated in a single newline character.
-   * If the textline ends a paragraph, it gets a second terminal newline.
-   */
-  void IterateAndAppendUTF8TextlineText(std::string *text);
-
-  /**
-   * Appends the text of the current paragraph in reading order
-   * to the given buffer.
-   * Each textline is terminated in a single newline character, and the
-   * paragraph gets an extra newline at the end.
-   */
-  void AppendUTF8ParagraphText(std::string *text) const;
-
-  /** Returns whether the bidi_debug flag is set to at least min_level. */
-  bool BidiDebug(int min_level) const;
-
-  bool current_paragraph_is_ltr_;
-
-  /**
-   * Is the currently pointed-at character at the beginning of
-   * a minor-direction run?
-   */
-  bool at_beginning_of_minor_run_;
-
-  /** Is the currently pointed-at character in a minor-direction sequence? */
-  bool in_minor_direction_;
-
-  /**
-   * Should detected inter-word spaces be preserved, or "compressed" to a single
-   * space character (default behavior).
-   */
-  bool preserve_interword_spaces_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/unichar.h
+++ b/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/unichar.h
@ -1,174 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        unichar.h
-// Description: Unicode character/ligature class.
-// Author:      Ray Smith
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCUTIL_UNICHAR_H_
-#define TESSERACT_CCUTIL_UNICHAR_H_
-
-#include "export.h"
-
-#include <memory.h>
-#include <cstring>
-#include <string>
-#include <vector>
-
-namespace tesseract {
-
-// Maximum number of characters that can be stored in a UNICHAR. Must be
-// at least 4. Must not exceed 31 without changing the coding of length.
-#define UNICHAR_LEN 30
-
-// A UNICHAR_ID is the unique id of a unichar.
-using UNICHAR_ID = int;
-
-// A variable to indicate an invalid or uninitialized unichar id.
-static const int INVALID_UNICHAR_ID = -1;
-// A special unichar that corresponds to INVALID_UNICHAR_ID.
-static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
-
-enum StrongScriptDirection {
-  DIR_NEUTRAL = 0,       // Text contains only neutral characters.
-  DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
-  DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
-  DIR_MIX = 3,           // Text contains a mixture of left-to-right
-                         // and right-to-left characters.
-};
-
-using char32 = signed int;
-
-// The UNICHAR class holds a single classification result. This may be
-// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
-// multiple Unicode characters representing the NFKC expansion of a ligature
-// such as fi, ffl etc. These are also stored as utf8.
-class TESS_API UNICHAR {
-public:
-  UNICHAR() {
-    memset(chars, 0, UNICHAR_LEN);
-  }
-
-  // Construct from a utf8 string. If len<0 then the string is null terminated.
-  // If the string is too long to fit in the UNICHAR then it takes only what
-  // will fit.
-  UNICHAR(const char *utf8_str, int len);
-
-  // Construct from a single UCS4 character.
-  explicit UNICHAR(int unicode);
-
-  // Default copy constructor and operator= are OK.
-
-  // Get the first character as UCS-4.
-  int first_uni() const;
-
-  // Get the length of the UTF8 string.
-  int utf8_len() const {
-    int len = chars[UNICHAR_LEN - 1];
-    return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
-  }
-
-  // Get a UTF8 string, but NOT nullptr terminated.
-  const char *utf8() const {
-    return chars;
-  }
-
-  // Get a terminated UTF8 string: Must delete[] it after use.
-  char *utf8_str() const;
-
-  // Get the number of bytes in the first character of the given utf8 string.
-  static int utf8_step(const char *utf8_str);
-
-  // A class to simplify iterating over and accessing elements of a UTF8
-  // string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
-  // take ownership of the underlying byte array. It also does not permit
-  // modification of the array (as the name suggests).
-  //
-  // Example:
-  //   for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
-  //        it != UNICHAR::end(str, len);
-  //        ++it) {
-  //     printf("UCS-4 symbol code = %d\n", *it);
-  //     char buf[5];
-  //     int char_len = it.get_utf8(buf); buf[char_len] = '\0';
-  //     printf("Char = %s\n", buf);
-  //   }
-  class TESS_API const_iterator {
-    using CI = const_iterator;
-
-  public:
-    // Step to the next UTF8 character.
-    // If the current position is at an illegal UTF8 character, then print an
-    // error message and step by one byte. If the current position is at a
-    // nullptr value, don't step past it.
-    const_iterator &operator++();
-
-    // Return the UCS-4 value at the current position.
-    // If the current position is at an illegal UTF8 value, return a single
-    // space character.
-    int operator*() const;
-
-    // Store the UTF-8 encoding of the current codepoint into buf, which must be
-    // at least 4 bytes long. Return the number of bytes written.
-    // If the current position is at an illegal UTF8 value, writes a single
-    // space character and returns 1.
-    // Note that this method does not null-terminate the buffer.
-    int get_utf8(char *buf) const;
-    // Returns the number of bytes of the current codepoint. Returns 1 if the
-    // current position is at an illegal UTF8 value.
-    int utf8_len() const;
-    // Returns true if the UTF-8 encoding at the current position is legal.
-    bool is_legal() const;
-
-    // Return the pointer into the string at the current position.
-    const char *utf8_data() const {
-      return it_;
-    }
-
-    // Iterator equality operators.
-    friend bool operator==(const CI &lhs, const CI &rhs) {
-      return lhs.it_ == rhs.it_;
-    }
-    friend bool operator!=(const CI &lhs, const CI &rhs) {
-      return !(lhs == rhs);
-    }
-
-  private:
-    friend class UNICHAR;
-    explicit const_iterator(const char *it) : it_(it) {}
-
-    const char *it_; // Pointer into the string.
-  };
-
-  // Create a start/end iterator pointing to a string. Note that these methods
-  // are static and do NOT create a copy or take ownership of the underlying
-  // array.
-  static const_iterator begin(const char *utf8_str, int byte_length);
-  static const_iterator end(const char *utf8_str, int byte_length);
-
-  // Converts a utf-8 string to a vector of unicodes.
-  // Returns an empty vector if the input contains invalid UTF-8.
-  static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
-  // Converts a vector of unicodes to a utf8 string.
-  // Returns an empty string if the input contains an invalid unicode.
-  static std::string UTF32ToUTF8(const std::vector<char32> &str32);
-
-private:
-  // A UTF-8 representation of 1 or more Unicode characters.
-  // The last element (chars[UNICHAR_LEN - 1]) is a length if
-  // its value < UNICHAR_LEN, otherwise it is a genuine character.
-  char chars[UNICHAR_LEN]{};
-};
-
-} // namespace tesseract
-
-#endif // TESSERACT_CCUTIL_UNICHAR_H_
--- a/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/version.h
+++ b/third_party/ocr/tesseract-ocr/uos/aarch64/include/tesseract/version.h
@ -1,34 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        version.h
-// Description: Version information
-//
-// (C) Copyright 2018, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_VERSION_H_
-#define TESSERACT_API_VERSION_H_
-
-// clang-format off
-
-#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
-#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
-#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
-
-#define TESSERACT_VERSION          \
-  (TESSERACT_MAJOR_VERSION << 16 | \
-   TESSERACT_MINOR_VERSION <<  8 | \
-   TESSERACT_MICRO_VERSION)
-
-#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
-
-// clang-format on
-
-#endif // TESSERACT_API_VERSION_H_
--- a/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/baseapi.h
+++ b/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/baseapi.h
@ -1,812 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        baseapi.h
-// Description: Simple API for calling tesseract.
-// Author:      Ray Smith
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_BASEAPI_H_
-#define TESSERACT_API_BASEAPI_H_
-
-#ifdef HAVE_CONFIG_H
-#  include "config_auto.h" // DISABLED_LEGACY_ENGINE
-#endif
-
-#include "export.h"
-#include "pageiterator.h"
-#include "publictypes.h"
-#include "resultiterator.h"
-#include "unichar.h"
-
-#include "version.h"
-
-#include <cstdio>
-#include <vector> // for std::vector
-
-struct Pix;
-struct Pixa;
-struct Boxa;
-
-namespace tesseract {
-
-class PAGE_RES;
-class ParagraphModel;
-class BLOCK_LIST;
-class ETEXT_DESC;
-struct OSResults;
-class UNICHARSET;
-
-class Dawg;
-class Dict;
-class EquationDetect;
-class PageIterator;
-class ImageThresholder;
-class LTRResultIterator;
-class ResultIterator;
-class MutableIterator;
-class TessResultRenderer;
-class Tesseract;
-
-// Function to read a std::vector<char> from a whole file.
-// Returns false on failure.
-using FileReader = bool (*)(const char *filename, std::vector<char> *data);
-
-using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
-                               bool) const;
-using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
-                                                  int, const char *, int);
-
-/**
- * Base class for all tesseract APIs.
- * Specific classes can add ability to work on different inputs or produce
- * different outputs.
- * This class is mostly an interface layer on top of the Tesseract instance
- * class to hide the data types so that users of this class don't have to
- * include any other Tesseract headers.
- */
-class TESS_API TessBaseAPI {
-public:
-  TessBaseAPI();
-  virtual ~TessBaseAPI();
-  // Copy constructor and assignment operator are currently unsupported.
-  TessBaseAPI(TessBaseAPI const &) = delete;
-  TessBaseAPI &operator=(TessBaseAPI const &) = delete;
-
-  /**
-   * Returns the version identifier as a static string. Do not delete.
-   */
-  static const char *Version();
-
-  /**
-   * If compiled with OpenCL AND an available OpenCL
-   * device is deemed faster than serial code, then
-   * "device" is populated with the cl_device_id
-   * and returns sizeof(cl_device_id)
-   * otherwise *device=nullptr and returns 0.
-   */
-  static size_t getOpenCLDevice(void **device);
-
-  /**
-   * Set the name of the input file. Needed for training and
-   * reading a UNLV zone file, and for searchable PDF output.
-   */
-  void SetInputName(const char *name);
-  /**
-   * These functions are required for searchable PDF output.
-   * We need our hands on the input file so that we can include
-   * it in the PDF without transcoding. If that is not possible,
-   * we need the original image. Finally, resolution metadata
-   * is stored in the PDF so we need that as well.
-   */
-  const char *GetInputName();
-  // Takes ownership of the input pix.
-  void SetInputImage(Pix *pix);
-  Pix *GetInputImage();
-  int GetSourceYResolution();
-  const char *GetDatapath();
-
-  /** Set the name of the bonus output files. Needed only for debugging. */
-  void SetOutputName(const char *name);
-
-  /**
-   * Set the value of an internal "parameter."
-   * Supply the name of the parameter and the value as a string, just as
-   * you would in a config file.
-   * Returns false if the name lookup failed.
-   * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
-   * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
-   * SetVariable may be used before Init, but settings will revert to
-   * defaults on End().
-   *
-   * Note: Must be called after Init(). Only works for non-init variables
-   * (init variables should be passed to Init()).
-   */
-  bool SetVariable(const char *name, const char *value);
-  bool SetDebugVariable(const char *name, const char *value);
-
-  /**
-   * Returns true if the parameter was found among Tesseract parameters.
-   * Fills in value with the value of the parameter.
-   */
-  bool GetIntVariable(const char *name, int *value) const;
-  bool GetBoolVariable(const char *name, bool *value) const;
-  bool GetDoubleVariable(const char *name, double *value) const;
-
-  /**
-   * Returns the pointer to the string that represents the value of the
-   * parameter if it was found among Tesseract parameters.
-   */
-  const char *GetStringVariable(const char *name) const;
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-  /**
-   * Print Tesseract fonts table to the given file.
-   */
-  void PrintFontsTable(FILE *fp) const;
-
-#endif
-
-  /**
-   * Print Tesseract parameters to the given file.
-   */
-  void PrintVariables(FILE *fp) const;
-
-  /**
-   * Get value of named variable as a string, if it exists.
-   */
-  bool GetVariableAsString(const char *name, std::string *val) const;
-
-  /**
-   * Instances are now mostly thread-safe and totally independent,
-   * but some global parameters remain. Basically it is safe to use multiple
-   * TessBaseAPIs in different threads in parallel, UNLESS:
-   * you use SetVariable on some of the Params in classify and textord.
-   * If you do, then the effect will be to change it for all your instances.
-   *
-   * Start tesseract. Returns zero on success and -1 on failure.
-   * NOTE that the only members that may be called before Init are those
-   * listed above here in the class definition.
-   *
-   * The datapath must be the name of the tessdata directory.
-   * The language is (usually) an ISO 639-3 string or nullptr will default to
-   * eng. It is entirely safe (and eventually will be efficient too) to call
-   * Init multiple times on the same instance to change language, or just
-   * to reset the classifier.
-   * The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
-   * that multiple languages are to be loaded. Eg hin+eng will load Hindi and
-   * English. Languages may specify internally that they want to be loaded
-   * with one or more other languages, so the ~ sign is available to override
-   * that. Eg if hin were set to load eng by default, then hin+~eng would force
-   * loading only hin. The number of loaded languages is limited only by
-   * memory, with the caveat that loading additional languages will impact
-   * both speed and accuracy, as there is more work to do to decide on the
-   * applicable language, and there is more chance of hallucinating incorrect
-   * words.
-   * WARNING: On changing languages, all Tesseract parameters are reset
-   * back to their default values. (Which may vary between languages.)
-   * If you have a rare need to set a Variable that controls
-   * initialization for a second call to Init you should explicitly
-   * call End() and then use SetVariable before Init. This is only a very
-   * rare use case, since there are very few uses that require any parameters
-   * to be set before Init.
-   *
-   * If set_only_non_debug_params is true, only params that do not contain
-   * "debug" in the name will be set.
-   */
-  int Init(const char *datapath, const char *language, OcrEngineMode mode,
-           char **configs, int configs_size,
-           const std::vector<std::string> *vars_vec,
-           const std::vector<std::string> *vars_values,
-           bool set_only_non_debug_params);
-  int Init(const char *datapath, const char *language, OcrEngineMode oem) {
-    return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
-  }
-  int Init(const char *datapath, const char *language) {
-    return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
-                false);
-  }
-  // In-memory version reads the traineddata file directly from the given
-  // data[data_size] array, and/or reads data via a FileReader.
-  int Init(const char *data, int data_size, const char *language,
-           OcrEngineMode mode, char **configs, int configs_size,
-           const std::vector<std::string> *vars_vec,
-           const std::vector<std::string> *vars_values,
-           bool set_only_non_debug_params, FileReader reader);
-
-  /**
-   * Returns the languages string used in the last valid initialization.
-   * If the last initialization specified "deu+hin" then that will be
-   * returned. If hin loaded eng automatically as well, then that will
-   * not be included in this list. To find the languages actually
-   * loaded use GetLoadedLanguagesAsVector.
-   * The returned string should NOT be deleted.
-   */
-  const char *GetInitLanguagesAsString() const;
-
-  /**
-   * Returns the loaded languages in the vector of std::string.
-   * Includes all languages loaded by the last Init, including those loaded
-   * as dependencies of other loaded languages.
-   */
-  void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
-
-  /**
-   * Returns the available languages in the sorted vector of std::string.
-   */
-  void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
-
-  /**
-   * Init only for page layout analysis. Use only for calls to SetImage and
-   * AnalysePage. Calls that attempt recognition will generate an error.
-   */
-  void InitForAnalysePage();
-
-  /**
-   * Read a "config" file containing a set of param, value pairs.
-   * Searches the standard places: tessdata/configs, tessdata/tessconfigs
-   * and also accepts a relative or absolute path name.
-   * Note: only non-init params will be set (init params are set by Init()).
-   */
-  void ReadConfigFile(const char *filename);
-  /** Same as above, but only set debug params from the given config file. */
-  void ReadDebugConfigFile(const char *filename);
-
-  /**
-   * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
-   * The mode is stored as an IntParam so it can also be modified by
-   * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
-   */
-  void SetPageSegMode(PageSegMode mode);
-
-  /** Return the current page segmentation mode. */
-  PageSegMode GetPageSegMode() const;
-
-  /**
-   * Recognize a rectangle from an image and return the result as a string.
-   * May be called many times for a single Init.
-   * Currently has no error checking.
-   * Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
-   * Palette color images will not work properly and must be converted to
-   * 24 bit.
-   * Binary images of 1 bit per pixel may also be given but they must be
-   * byte packed with the MSB of the first byte being the first pixel, and a
-   * 1 represents WHITE. For binary images set bytes_per_pixel=0.
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   *
-   * Note that TesseractRect is the simplified convenience interface.
-   * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
-   * and one or more of the Get*Text functions below.
-   */
-  char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
-                      int bytes_per_line, int left, int top, int width,
-                      int height);
-
-  /**
-   * Call between pages or documents etc to free up memory and forget
-   * adaptive data.
-   */
-  void ClearAdaptiveClassifier();
-
-  /**
-   * @defgroup AdvancedAPI Advanced API
-   * The following methods break TesseractRect into pieces, so you can
-   * get hold of the thresholded image, get the text in different formats,
-   * get bounding boxes, confidences etc.
-   */
-  /* @{ */
-
-  /**
-   * Provide an image for Tesseract to recognize. Format is as
-   * TesseractRect above. Copies the image buffer and converts to Pix.
-   * SetImage clears all recognition results, and sets the rectangle to the
-   * full image, so it may be followed immediately by a GetUTF8Text, and it
-   * will automatically perform recognition.
-   */
-  void SetImage(const unsigned char *imagedata, int width, int height,
-                int bytes_per_pixel, int bytes_per_line);
-
-  /**
-   * Provide an image for Tesseract to recognize. As with SetImage above,
-   * Tesseract takes its own copy of the image, so it need not persist until
-   * after Recognize.
-   * Pix vs raw, which to use?
-   * Use Pix where possible. Tesseract uses Pix as its internal representation
-   * and it is therefore more efficient to provide a Pix directly.
-   */
-  void SetImage(Pix *pix);
-
-  /**
-   * Set the resolution of the source image in pixels per inch so font size
-   * information can be calculated in results.  Call this after SetImage().
-   */
-  void SetSourceResolution(int ppi);
-
-  /**
-   * Restrict recognition to a sub-rectangle of the image. Call after SetImage.
-   * Each SetRectangle clears the recogntion results so multiple rectangles
-   * can be recognized with the same image.
-   */
-  void SetRectangle(int left, int top, int width, int height);
-
-  /**
-   * Get a copy of the internal thresholded image from Tesseract.
-   * Caller takes ownership of the Pix and must pixDestroy it.
-   * May be called any time after SetImage, or after TesseractRect.
-   */
-  Pix *GetThresholdedImage();
-
-  /**
-   * Get the result of page layout analysis as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   */
-  Boxa *GetRegions(Pixa **pixa);
-
-  /**
-   * Get the textlines as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * If raw_image is true, then extract from the original image instead of the
-   * thresholded image and pad by raw_padding pixels.
-   * If blockids is not nullptr, the block-id of each line is also returned as
-   * an array of one element per line. delete [] after use. If paraids is not
-   * nullptr, the paragraph-id of each line within its block is also returned as
-   * an array of one element per line. delete [] after use.
-   */
-  Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
-                     int **blockids, int **paraids);
-  /*
-   Helper method to extract from the thresholded image. (most common usage)
-*/
-  Boxa *GetTextlines(Pixa **pixa, int **blockids) {
-    return GetTextlines(false, 0, pixa, blockids, nullptr);
-  }
-
-  /**
-   * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
-   * pair, in reading order. Enables downstream handling of non-rectangular
-   * regions.
-   * Can be called before or after Recognize.
-   * If blockids is not nullptr, the block-id of each line is also returned as
-   * an array of one element per line. delete [] after use.
-   */
-  Boxa *GetStrips(Pixa **pixa, int **blockids);
-
-  /**
-   * Get the words as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   */
-  Boxa *GetWords(Pixa **pixa);
-
-  /**
-   * Gets the individual connected (text) components (created
-   * after pages segmentation step, but before recognition)
-   * as a leptonica-style Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * Note: the caller is responsible for calling boxaDestroy()
-   * on the returned Boxa array and pixaDestroy() on cc array.
-   */
-  Boxa *GetConnectedComponents(Pixa **cc);
-
-  /**
-   * Get the given level kind of components (block, textline, word etc.) as a
-   * leptonica-style Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * If blockids is not nullptr, the block-id of each component is also returned
-   * as an array of one element per component. delete [] after use.
-   * If blockids is not nullptr, the paragraph-id of each component with its
-   * block is also returned as an array of one element per component. delete []
-   * after use. If raw_image is true, then portions of the original image are
-   * extracted instead of the thresholded image and padded with raw_padding. If
-   * text_only is true, then only text components are returned.
-   */
-  Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
-                           bool raw_image, int raw_padding, Pixa **pixa,
-                           int **blockids, int **paraids);
-  // Helper function to get binary images with no padding (most common usage).
-  Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
-                           Pixa **pixa, int **blockids) {
-    return GetComponentImages(level, text_only, false, 0, pixa, blockids,
-                              nullptr);
-  }
-
-  /**
-   * Returns the scale factor of the thresholded image that would be returned by
-   * GetThresholdedImage() and the various GetX() methods that call
-   * GetComponentImages().
-   * Returns 0 if no thresholder has been set.
-   */
-  int GetThresholdedImageScaleFactor() const;
-
-  /**
-   * Runs page layout analysis in the mode set by SetPageSegMode.
-   * May optionally be called prior to Recognize to get access to just
-   * the page layout results. Returns an iterator to the results.
-   * If merge_similar_words is true, words are combined where suitable for use
-   * with a line recognizer. Use if you want to use AnalyseLayout to find the
-   * textlines, and then want to process textline fragments with an external
-   * line recognizer.
-   * Returns nullptr on error or an empty page.
-   * The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  PageIterator *AnalyseLayout();
-  PageIterator *AnalyseLayout(bool merge_similar_words);
-
-  /**
-   * Recognize the image from SetAndThresholdImage, generating Tesseract
-   * internal structures. Returns 0 on success.
-   * Optional. The Get*Text functions below will call Recognize if needed.
-   * After Recognize, the output is kept internally until the next SetImage.
-   */
-  int Recognize(ETEXT_DESC *monitor);
-
-  /**
-   * Methods to retrieve information after SetAndThresholdImage(),
-   * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
-   */
-
-  /**
-   * Turns images into symbolic text.
-   *
-   * filename can point to a single image, a multi-page TIFF,
-   * or a plain text list of image filenames.
-   *
-   * retry_config is useful for debugging. If not nullptr, you can fall
-   * back to an alternate configuration if a page fails for some
-   * reason.
-   *
-   * timeout_millisec terminates processing if any single page
-   * takes too long. Set to 0 for unlimited time.
-   *
-   * renderer is responible for creating the output. For example,
-   * use the TessTextRenderer if you want plaintext output, or
-   * the TessPDFRender to produce searchable PDF.
-   *
-   * If tessedit_page_number is non-negative, will only process that
-   * single page. Works for multi-page tiff file, or filelist.
-   *
-   * Returns true if successful, false on error.
-   */
-  bool ProcessPages(const char *filename, const char *retry_config,
-                    int timeout_millisec, TessResultRenderer *renderer);
-  // Does the real work of ProcessPages.
-  bool ProcessPagesInternal(const char *filename, const char *retry_config,
-                            int timeout_millisec, TessResultRenderer *renderer);
-
-  /**
-   * Turn a single image into symbolic text.
-   *
-   * The pix is the image processed. filename and page_index are
-   * metadata used by side-effect processes, such as reading a box
-   * file or formatting as hOCR.
-   *
-   * See ProcessPages for descriptions of other parameters.
-   */
-  bool ProcessPage(Pix *pix, int page_index, const char *filename,
-                   const char *retry_config, int timeout_millisec,
-                   TessResultRenderer *renderer);
-
-  /**
-   * Get a reading-order iterator to the results of LayoutAnalysis and/or
-   * Recognize. The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  ResultIterator *GetIterator();
-
-  /**
-   * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
-   * The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  MutableIterator *GetMutableIterator();
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   */
-  char *GetUTF8Text();
-
-  /**
-   * Make a HTML-formatted string with hOCR markup from the internal
-   * data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * monitor can be used to
-   *  cancel the recognition
-   *  receive progress callbacks
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
-
-  /**
-   * Make a HTML-formatted string with hOCR markup from the internal
-   * data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetHOCRText(int page_number);
-
-  /**
-   * Make an XML-formatted string with Alto markup from the internal
-   * data structures.
-   */
-  char *GetAltoText(ETEXT_DESC *monitor, int page_number);
-
-  /**
-   * Make an XML-formatted string with Alto markup from the internal
-   * data structures.
-   */
-  char *GetAltoText(int page_number);
-
-  /**
-   * Make a TSV-formatted string from the internal data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetTSVText(int page_number);
-
-  /**
-   * Make a box file for LSTM training from the internal data structures.
-   * Constructs coordinates in the original image - not just the rectangle.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetLSTMBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded in the same
-   * format as a box file used in training.
-   * Constructs coordinates in the original image - not just the rectangle.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded in the same
-   * format as a WordStr box file used in training.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetWordStrBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UNLV format Latin-1 with specific reject and suspect codes.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetUNLVText();
-
-  /**
-   * Detect the orientation of the input image and apparent script (alphabet).
-   * orient_deg is the detected clockwise rotation of the input image in degrees
-   * (0, 90, 180, 270)
-   * orient_conf is the confidence (15.0 is reasonably confident)
-   * script_name is an ASCII string, the name of the script, e.g. "Latin"
-   * script_conf is confidence level in the script
-   * Returns true on success and writes values to each parameter as an output
-   */
-  bool DetectOrientationScript(int *orient_deg, float *orient_conf,
-                               const char **script_name, float *script_conf);
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   * page_number is a 0-based page index that will appear in the osd file.
-   */
-  char *GetOsdText(int page_number);
-
-  /** Returns the (average) confidence value between 0 and 100. */
-  int MeanTextConf();
-  /**
-   * Returns all word confidences (between 0 and 100) in an array, terminated
-   * by -1.  The calling function must delete [] after use.
-   * The number of confidences should correspond to the number of space-
-   * delimited words in GetUTF8Text.
-   */
-  int *AllWordConfidences();
-
-#ifndef DISABLED_LEGACY_ENGINE
-  /**
-   * Applies the given word to the adaptive classifier if possible.
-   * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
-   * tell the boundaries of the graphemes.
-   * Assumes that SetImage/SetRectangle have been used to set the image
-   * to the given word. The mode arg should be PSM_SINGLE_WORD or
-   * PSM_CIRCLE_WORD, as that will be used to control layout analysis.
-   * The currently set PageSegMode is preserved.
-   * Returns false if adaption was not possible for some reason.
-   */
-  bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
-#endif //  ndef DISABLED_LEGACY_ENGINE
-
-  /**
-   * Free up recognition results and any stored image data, without actually
-   * freeing any recognition data that would be time-consuming to reload.
-   * Afterwards, you must call SetImage or TesseractRect before doing
-   * any Recognize or Get* operation.
-   */
-  void Clear();
-
-  /**
-   * Close down tesseract and free up all memory. End() is equivalent to
-   * destructing and reconstructing your TessBaseAPI.
-   * Once End() has been used, none of the other API functions may be used
-   * other than Init and anything declared above it in the class definition.
-   */
-  void End();
-
-  /**
-   * Clear any library-level memory caches.
-   * There are a variety of expensive-to-load constant data structures (mostly
-   * language dictionaries) that are cached globally -- surviving the Init()
-   * and End() of individual TessBaseAPI's.  This function allows the clearing
-   * of these caches.
-   **/
-  static void ClearPersistentCache();
-
-  /**
-   * Check whether a word is valid according to Tesseract's language model
-   * @return 0 if the word is invalid, non-zero if valid.
-   * @warning temporary! This function will be removed from here and placed
-   * in a separate API at some future time.
-   */
-  int IsValidWord(const char *word) const;
-  // Returns true if utf8_character is defined in the UniCharset.
-  bool IsValidCharacter(const char *utf8_character) const;
-
-  bool GetTextDirection(int *out_offset, float *out_slope);
-
-  /** Sets Dict::letter_is_okay_ function to point to the given function. */
-  void SetDictFunc(DictFunc f);
-
-  /** Sets Dict::probability_in_context_ function to point to the given
-   * function.
-   */
-  void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
-
-  /**
-   * Estimates the Orientation And Script of the image.
-   * @return true if the image was processed successfully.
-   */
-  bool DetectOS(OSResults *);
-
-  /**
-   * Return text orientation of each block as determined by an earlier run
-   * of layout analysis.
-   */
-  void GetBlockTextOrientations(int **block_orientation,
-                                bool **vertical_writing);
-
-  /** This method returns the string form of the specified unichar. */
-  const char *GetUnichar(int unichar_id) const;
-
-  /** Return the pointer to the i-th dawg loaded into tesseract_ object. */
-  const Dawg *GetDawg(int i) const;
-
-  /** Return the number of dawgs loaded into tesseract_ object. */
-  int NumDawgs() const;
-
-  Tesseract *tesseract() const {
-    return tesseract_;
-  }
-
-  OcrEngineMode oem() const {
-    return last_oem_requested_;
-  }
-
-  void set_min_orientation_margin(double margin);
-  /* @} */
-
-protected:
-  /** Common code for setting the image. Returns true if Init has been called.
-   */
-  bool InternalSetImage();
-
-  /**
-   * Run the thresholder to make the thresholded image. If pix is not nullptr,
-   * the source is thresholded to pix instead of the internal IMAGE.
-   */
-  virtual bool Threshold(Pix **pix);
-
-  /**
-   * Find lines from the image making the BLOCK_LIST.
-   * @return 0 on success.
-   */
-  int FindLines();
-
-  /** Delete the pageres and block list ready for a new page. */
-  void ClearResults();
-
-  /**
-   * Return an LTR Result Iterator -- used only for training, as we really want
-   * to ignore all BiDi smarts at that point.
-   * delete once you're done with it.
-   */
-  LTRResultIterator *GetLTRIterator();
-
-  /**
-   * Return the length of the output text string, as UTF8, assuming
-   * one newline per line and one per block, with a terminator,
-   * and assuming a single character reject marker for each rejected character.
-   * Also return the number of recognized blobs in blob_count.
-   */
-  int TextLength(int *blob_count) const;
-
-  //// paragraphs.cpp ////////////////////////////////////////////////////
-  void DetectParagraphs(bool after_text_recognition);
-
-  const PAGE_RES *GetPageRes() const {
-    return page_res_;
-  }
-
-protected:
-  Tesseract *tesseract_;          ///< The underlying data object.
-  Tesseract *osd_tesseract_;      ///< For orientation & script detection.
-  EquationDetect *equ_detect_;    ///< The equation detector.
-  FileReader reader_;             ///< Reads files from any filesystem.
-  ImageThresholder *thresholder_; ///< Image thresholding module.
-  std::vector<ParagraphModel *> *paragraph_models_;
-  BLOCK_LIST *block_list_;           ///< The page layout.
-  PAGE_RES *page_res_;               ///< The page-level data.
-  std::string input_file_;           ///< Name used by training code.
-  std::string output_file_;          ///< Name used by debug code.
-  std::string datapath_;             ///< Current location of tessdata.
-  std::string language_;             ///< Last initialized language.
-  OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
-  bool recognition_done_;            ///< page_res_ contains recognition data.
-
-  /**
-   * @defgroup ThresholderParams Thresholder Parameters
-   * Parameters saved from the Thresholder. Needed to rebuild coordinates.
-   */
-  /* @{ */
-  int rect_left_;
-  int rect_top_;
-  int rect_width_;
-  int rect_height_;
-  int image_width_;
-  int image_height_;
-  /* @} */
-
-private:
-  // A list of image filenames gets special consideration
-  bool ProcessPagesFileList(FILE *fp, std::string *buf,
-                            const char *retry_config, int timeout_millisec,
-                            TessResultRenderer *renderer,
-                            int tessedit_page_number);
-  // TIFF supports multipage so gets special consideration.
-  bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
-                                 const char *filename, const char *retry_config,
-                                 int timeout_millisec,
-                                 TessResultRenderer *renderer,
-                                 int tessedit_page_number);
-}; // class TessBaseAPI.
-
-/** Escape a char string - remove &<>"' with HTML codes. */
-std::string HOcrEscape(const char *text);
-
-} // namespace tesseract
-
-#endif // TESSERACT_API_BASEAPI_H_
--- a/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/capi.h
+++ b/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/capi.h
@ -1,484 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        capi.h
-// Description: C-API TessBaseAPI
-//
-// (C) Copyright 2012, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef API_CAPI_H_
-#define API_CAPI_H_
-
-#include "export.h"
-
-#ifdef __cplusplus
-#  include <tesseract/baseapi.h>
-#  include <tesseract/ocrclass.h>
-#  include <tesseract/pageiterator.h>
-#  include <tesseract/renderer.h>
-#  include <tesseract/resultiterator.h>
-#endif
-
-#include <stdbool.h>
-#include <stdio.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef BOOL
-#  define BOOL int
-#  define TRUE 1
-#  define FALSE 0
-#endif
-
-#ifdef __cplusplus
-typedef tesseract::TessResultRenderer TessResultRenderer;
-typedef tesseract::TessBaseAPI TessBaseAPI;
-typedef tesseract::PageIterator TessPageIterator;
-typedef tesseract::ResultIterator TessResultIterator;
-typedef tesseract::MutableIterator TessMutableIterator;
-typedef tesseract::ChoiceIterator TessChoiceIterator;
-typedef tesseract::OcrEngineMode TessOcrEngineMode;
-typedef tesseract::PageSegMode TessPageSegMode;
-typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
-typedef tesseract::Orientation TessOrientation;
-typedef tesseract::ParagraphJustification TessParagraphJustification;
-typedef tesseract::WritingDirection TessWritingDirection;
-typedef tesseract::TextlineOrder TessTextlineOrder;
-typedef tesseract::PolyBlockType TessPolyBlockType;
-typedef tesseract::ETEXT_DESC ETEXT_DESC;
-#else
-typedef struct TessResultRenderer TessResultRenderer;
-typedef struct TessBaseAPI TessBaseAPI;
-typedef struct TessPageIterator TessPageIterator;
-typedef struct TessResultIterator TessResultIterator;
-typedef struct TessMutableIterator TessMutableIterator;
-typedef struct TessChoiceIterator TessChoiceIterator;
-typedef enum TessOcrEngineMode {
-  OEM_TESSERACT_ONLY,
-  OEM_LSTM_ONLY,
-  OEM_TESSERACT_LSTM_COMBINED,
-  OEM_DEFAULT
-} TessOcrEngineMode;
-typedef enum TessPageSegMode {
-  PSM_OSD_ONLY,
-  PSM_AUTO_OSD,
-  PSM_AUTO_ONLY,
-  PSM_AUTO,
-  PSM_SINGLE_COLUMN,
-  PSM_SINGLE_BLOCK_VERT_TEXT,
-  PSM_SINGLE_BLOCK,
-  PSM_SINGLE_LINE,
-  PSM_SINGLE_WORD,
-  PSM_CIRCLE_WORD,
-  PSM_SINGLE_CHAR,
-  PSM_SPARSE_TEXT,
-  PSM_SPARSE_TEXT_OSD,
-  PSM_RAW_LINE,
-  PSM_COUNT
-} TessPageSegMode;
-typedef enum TessPageIteratorLevel {
-  RIL_BLOCK,
-  RIL_PARA,
-  RIL_TEXTLINE,
-  RIL_WORD,
-  RIL_SYMBOL
-} TessPageIteratorLevel;
-typedef enum TessPolyBlockType {
-  PT_UNKNOWN,
-  PT_FLOWING_TEXT,
-  PT_HEADING_TEXT,
-  PT_PULLOUT_TEXT,
-  PT_EQUATION,
-  PT_INLINE_EQUATION,
-  PT_TABLE,
-  PT_VERTICAL_TEXT,
-  PT_CAPTION_TEXT,
-  PT_FLOWING_IMAGE,
-  PT_HEADING_IMAGE,
-  PT_PULLOUT_IMAGE,
-  PT_HORZ_LINE,
-  PT_VERT_LINE,
-  PT_NOISE,
-  PT_COUNT
-} TessPolyBlockType;
-typedef enum TessOrientation {
-  ORIENTATION_PAGE_UP,
-  ORIENTATION_PAGE_RIGHT,
-  ORIENTATION_PAGE_DOWN,
-  ORIENTATION_PAGE_LEFT
-} TessOrientation;
-typedef enum TessParagraphJustification {
-  JUSTIFICATION_UNKNOWN,
-  JUSTIFICATION_LEFT,
-  JUSTIFICATION_CENTER,
-  JUSTIFICATION_RIGHT
-} TessParagraphJustification;
-typedef enum TessWritingDirection {
-  WRITING_DIRECTION_LEFT_TO_RIGHT,
-  WRITING_DIRECTION_RIGHT_TO_LEFT,
-  WRITING_DIRECTION_TOP_TO_BOTTOM
-} TessWritingDirection;
-typedef enum TessTextlineOrder {
-  TEXTLINE_ORDER_LEFT_TO_RIGHT,
-  TEXTLINE_ORDER_RIGHT_TO_LEFT,
-  TEXTLINE_ORDER_TOP_TO_BOTTOM
-} TessTextlineOrder;
-typedef struct ETEXT_DESC ETEXT_DESC;
-#endif
-
-typedef bool (*TessCancelFunc)(void *cancel_this, int words);
-typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
-                                 int bottom);
-
-struct Pix;
-struct Boxa;
-struct Pixa;
-
-/* General free functions */
-
-TESS_API const char *TessVersion();
-TESS_API void TessDeleteText(const char *text);
-TESS_API void TessDeleteTextArray(char **arr);
-TESS_API void TessDeleteIntArray(const int *arr);
-
-/* Renderer API */
-TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
-                                                     BOOL font_info);
-TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
-                                                   const char *datadir,
-                                                   BOOL textonly);
-TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
-    const char *outputbase);
-
-TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
-TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
-                                       TessResultRenderer *next);
-TESS_API TessResultRenderer *TessResultRendererNext(
-    TessResultRenderer *renderer);
-TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
-                                              const char *title);
-TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
-                                         TessBaseAPI *api);
-TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
-
-TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
-TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
-TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
-
-/* Base API */
-
-TESS_API TessBaseAPI *TessBaseAPICreate();
-TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
-
-TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
-
-TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
-TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
-TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
-TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
-
-TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
-                                     const char *value);
-TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
-                                          const char *value);
-
-TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
-                                        const char *name, int *value);
-TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
-                                         const char *name, BOOL *value);
-TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
-                                           const char *name, double *value);
-TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
-                                                  const char *name);
-
-TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
-TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
-                                              const char *filename);
-
-TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode oem,
-                              char **configs, int configs_size);
-TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode oem);
-TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
-                              const char *language);
-
-TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode mode,
-                              char **configs, int configs_size, char **vars_vec,
-                              char **vars_values, size_t vars_vec_size,
-                              BOOL set_only_non_debug_params);
-
-TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
-                              const char *language, TessOcrEngineMode mode,
-                              char **configs, int configs_size, char **vars_vec,
-                              char **vars_values, size_t vars_vec_size,
-                              BOOL set_only_non_debug_params);
-
-TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
-    const TessBaseAPI *handle);
-TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
-    const TessBaseAPI *handle);
-TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
-    const TessBaseAPI *handle);
-
-TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
-                                        const char *filename);
-TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
-                                             const char *filename);
-
-TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
-                                        TessPageSegMode mode);
-TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
-
-TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
-                               const unsigned char *imagedata,
-                               int bytes_per_pixel, int bytes_per_line,
-                               int left, int top, int width, int height);
-
-TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
-                                  const unsigned char *imagedata, int width,
-                                  int height, int bytes_per_pixel,
-                                  int bytes_per_line);
-TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
-
-TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
-
-TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
-                                      int width, int height);
-
-TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
-TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
-                                            struct Pixa **pixa);
-TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
-                                              struct Pixa **pixa,
-                                              int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
-                                               BOOL raw_image, int raw_padding,
-                                               struct Pixa **pixa,
-                                               int **blockids, int **paraids);
-TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
-                                           struct Pixa **pixa, int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
-                                          struct Pixa **pixa);
-TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
-                                                        struct Pixa **cc);
-TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
-                                                    TessPageIteratorLevel level,
-                                                    BOOL text_only,
-                                                    struct Pixa **pixa,
-                                                    int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
-    TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
-    BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
-    int **paraids);
-
-TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
-    const TessBaseAPI *handle);
-
-TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
-
-TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
-                                      const char *retry_config,
-                                      int timeout_millisec,
-                                      TessResultRenderer *renderer);
-TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
-                                     int page_index, const char *filename,
-                                     const char *retry_config,
-                                     int timeout_millisec,
-                                     TessResultRenderer *renderer);
-
-TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
-TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
-    TessBaseAPI *handle);
-
-TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
-TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
-
-TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
-
-TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
-                                            int page_number);
-
-TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
-TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
-
-TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
-
-#ifndef DISABLED_LEGACY_ENGINE
-TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
-                                        TessPageSegMode mode,
-                                        const char *wordstr);
-#endif // #ifndef DISABLED_LEGACY_ENGINE
-
-TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
-TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
-TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
-                                          float *out_slope);
-
-TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
-
-TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-// Call TessDeleteText(*best_script_name) to free memory allocated by this
-// function
-TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
-                                                 int *orient_deg,
-                                                 float *orient_conf,
-                                                 const char **script_name,
-                                                 float *script_conf);
-#endif // #ifndef DISABLED_LEGACY_ENGINE
-
-TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
-                                                 double margin);
-
-TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
-
-TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
-
-TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
-                                               int **block_orientation,
-                                               bool **vertical_writing);
-
-/* Page iterator */
-
-TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
-
-TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
-
-TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
-
-TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
-                                   TessPageIteratorLevel level);
-
-TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
-                                              TessPageIteratorLevel level);
-
-TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
-                                               TessPageIteratorLevel level,
-                                               TessPageIteratorLevel element);
-
-TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
-                                          TessPageIteratorLevel level,
-                                          int *left, int *top, int *right,
-                                          int *bottom);
-
-TESS_API TessPolyBlockType
-TessPageIteratorBlockType(const TessPageIterator *handle);
-
-TESS_API struct Pix *TessPageIteratorGetBinaryImage(
-    const TessPageIterator *handle, TessPageIteratorLevel level);
-
-TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
-                                              TessPageIteratorLevel level,
-                                              int padding,
-                                              struct Pix *original_image,
-                                              int *left, int *top);
-
-TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
-                                       TessPageIteratorLevel level, int *x1,
-                                       int *y1, int *x2, int *y2);
-
-TESS_API void TessPageIteratorOrientation(
-    TessPageIterator *handle, TessOrientation *orientation,
-    TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
-    float *deskew_angle);
-
-TESS_API void TessPageIteratorParagraphInfo(
-    TessPageIterator *handle, TessParagraphJustification *justification,
-    BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
-
-/* Result iterator */
-
-TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
-TESS_API TessResultIterator *TessResultIteratorCopy(
-    const TessResultIterator *handle);
-TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
-    TessResultIterator *handle);
-TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
-    const TessResultIterator *handle);
-TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
-    const TessResultIterator *handle);
-
-TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
-                                     TessPageIteratorLevel level);
-TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
-                                             TessPageIteratorLevel level);
-TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
-                                            TessPageIteratorLevel level);
-TESS_API const char *TessResultIteratorWordRecognitionLanguage(
-    const TessResultIterator *handle);
-TESS_API const char *TessResultIteratorWordFontAttributes(
-    const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
-    BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
-    int *pointsize, int *font_id);
-
-TESS_API BOOL
-TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
-TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
-
-TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
-TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
-TESS_API const char *TessChoiceIteratorGetUTF8Text(
-    const TessChoiceIterator *handle);
-TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
-
-/* Progress monitor */
-
-TESS_API ETEXT_DESC *TessMonitorCreate();
-TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
-                                       TessCancelFunc cancelFunc);
-TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
-TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
-                                         TessProgressFunc progressFunc);
-TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // API_CAPI_H_
--- a/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/export.h
+++ b/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/export.h
@ -1,37 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        export.h
-// Description: Place holder
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_PLATFORM_H_
-#define TESSERACT_PLATFORM_H_
-
-#ifndef TESS_API
-#  if defined(_WIN32) || defined(__CYGWIN__)
-#    if defined(TESS_EXPORTS)
-#      define TESS_API __declspec(dllexport)
-#    elif defined(TESS_IMPORTS)
-#      define TESS_API __declspec(dllimport)
-#    else
-#      define TESS_API
-#    endif
-#  else
-#    if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
-#      define TESS_API __attribute__((visibility("default")))
-#    else
-#      define TESS_API
-#    endif
-#  endif
-#endif
-
-#endif // TESSERACT_PLATFORM_H_
--- a/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/ltrresultiterator.h
+++ b/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/ltrresultiterator.h
@ -1,235 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        ltrresultiterator.h
-// Description: Iterator for tesseract results in strict left-to-right
-//              order that avoids using tesseract internal data structures.
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
-#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
-
-#include "export.h"       // for TESS_API
-#include "pageiterator.h" // for PageIterator
-#include "publictypes.h"  // for PageIteratorLevel
-#include "unichar.h"      // for StrongScriptDirection
-
-namespace tesseract {
-
-class BLOB_CHOICE_IT;
-class PAGE_RES;
-class WERD_RES;
-
-class Tesseract;
-
-// Class to iterate over tesseract results, providing access to all levels
-// of the page hierarchy, without including any tesseract headers or having
-// to handle any tesseract structures.
-// WARNING! This class points to data held within the TessBaseAPI class, and
-// therefore can only be used while the TessBaseAPI class still exists and
-// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-// DetectOS, or anything else that changes the internal PAGE_RES.
-// See tesseract/publictypes.h for the definition of PageIteratorLevel.
-// See also base class PageIterator, which contains the bulk of the interface.
-// LTRResultIterator adds text-specific methods for access to OCR output.
-
-class TESS_API LTRResultIterator : public PageIterator {
-  friend class ChoiceIterator;
-
-public:
-  // page_res and tesseract come directly from the BaseAPI.
-  // The rectangle parameters are copied indirectly from the Thresholder,
-  // via the BaseAPI. They represent the coordinates of some rectangle in an
-  // original image (in top-left-origin coordinates) and therefore the top-left
-  // needs to be added to any output boxes in order to specify coordinates
-  // in the original image. See TessBaseAPI::SetRectangle.
-  // The scale and scaled_yres are in case the Thresholder scaled the image
-  // rectangle prior to thresholding. Any coordinates in tesseract's image
-  // must be divided by scale before adding (rect_left, rect_top).
-  // The scaled_yres indicates the effective resolution of the binary image
-  // that tesseract has been given by the Thresholder.
-  // After the constructor, Begin has already been called.
-  LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
-                    int scaled_yres, int rect_left, int rect_top,
-                    int rect_width, int rect_height);
-
-  ~LTRResultIterator() override;
-
-  // LTRResultIterators may be copied! This makes it possible to iterate over
-  // all the objects at a lower level, while maintaining an iterator to
-  // objects at a higher level. These constructors DO NOT CALL Begin, so
-  // iterations will continue from the location of src.
-  // TODO: For now the copy constructor and operator= only need the base class
-  // versions, but if new data members are added, don't forget to add them!
-
-  // ============= Moving around within the page ============.
-
-  // See PageIterator.
-
-  // ============= Accessing data ==============.
-
-  // Returns the null terminated UTF-8 encoded text string for the current
-  // object at the given level. Use delete [] to free after use.
-  char *GetUTF8Text(PageIteratorLevel level) const;
-
-  // Set the string inserted at the end of each text line. "\n" by default.
-  void SetLineSeparator(const char *new_line);
-
-  // Set the string inserted at the end of each paragraph. "\n" by default.
-  void SetParagraphSeparator(const char *new_para);
-
-  // Returns the mean confidence of the current object at the given level.
-  // The number should be interpreted as a percent probability. (0.0f-100.0f)
-  float Confidence(PageIteratorLevel level) const;
-
-  // ============= Functions that refer to words only ============.
-
-  // Returns the font attributes of the current word. If iterating at a higher
-  // level object than words, eg textlines, then this will return the
-  // attributes of the first word in that textline.
-  // The actual return value is a string representing a font name. It points
-  // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
-  // the iterator itself, ie rendered invalid by various members of
-  // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
-  // Pointsize is returned in printers points (1/72 inch.)
-  const char *WordFontAttributes(bool *is_bold, bool *is_italic,
-                                 bool *is_underlined, bool *is_monospace,
-                                 bool *is_serif, bool *is_smallcaps,
-                                 int *pointsize, int *font_id) const;
-
-  // Return the name of the language used to recognize this word.
-  // On error, nullptr.  Do not delete this pointer.
-  const char *WordRecognitionLanguage() const;
-
-  // Return the overall directionality of this word.
-  StrongScriptDirection WordDirection() const;
-
-  // Returns true if the current word was found in a dictionary.
-  bool WordIsFromDictionary() const;
-
-  // Returns the number of blanks before the current word.
-  int BlanksBeforeWord() const;
-
-  // Returns true if the current word is numeric.
-  bool WordIsNumeric() const;
-
-  // Returns true if the word contains blamer information.
-  bool HasBlamerInfo() const;
-
-  // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
-  // of the current word.
-  const void *GetParamsTrainingBundle() const;
-
-  // Returns a pointer to the string with blamer information for this word.
-  // Assumes that the word's blamer_bundle is not nullptr.
-  const char *GetBlamerDebug() const;
-
-  // Returns a pointer to the string with misadaption information for this word.
-  // Assumes that the word's blamer_bundle is not nullptr.
-  const char *GetBlamerMisadaptionDebug() const;
-
-  // Returns true if a truth string was recorded for the current word.
-  bool HasTruthString() const;
-
-  // Returns true if the given string is equivalent to the truth string for
-  // the current word.
-  bool EquivalentToTruth(const char *str) const;
-
-  // Returns a null terminated UTF-8 encoded truth string for the current word.
-  // Use delete [] to free after use.
-  char *WordTruthUTF8Text() const;
-
-  // Returns a null terminated UTF-8 encoded normalized OCR string for the
-  // current word. Use delete [] to free after use.
-  char *WordNormedUTF8Text() const;
-
-  // Returns a pointer to serialized choice lattice.
-  // Fills lattice_size with the number of bytes in lattice data.
-  const char *WordLattice(int *lattice_size) const;
-
-  // ============= Functions that refer to symbols only ============.
-
-  // Returns true if the current symbol is a superscript.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsSuperscript() const;
-  // Returns true if the current symbol is a subscript.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsSubscript() const;
-  // Returns true if the current symbol is a dropcap.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsDropcap() const;
-
-protected:
-  const char *line_separator_;
-  const char *paragraph_separator_;
-};
-
-// Class to iterate over the classifier choices for a single RIL_SYMBOL.
-class TESS_API ChoiceIterator {
-public:
-  // Construction is from a LTRResultIterator that points to the symbol of
-  // interest. The ChoiceIterator allows a one-shot iteration over the
-  // choices for this symbol and after that it is useless.
-  explicit ChoiceIterator(const LTRResultIterator &result_it);
-  ~ChoiceIterator();
-
-  // Moves to the next choice for the symbol and returns false if there
-  // are none left.
-  bool Next();
-
-  // ============= Accessing data ==============.
-
-  // Returns the null terminated UTF-8 encoded text string for the current
-  // choice.
-  // NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
-  // internal structure and should NOT be delete[]ed to free after use.
-  const char *GetUTF8Text() const;
-
-  // Returns the confidence of the current choice depending on the used language
-  // data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
-  // choices for one symbol should roughly add up to 1.0f.
-  // If only traineddata of the legacy engine is used, the number should be
-  // interpreted as a percent probability. (0.0f-100.0f) In this case
-  // probabilities won't add up to 100. Each one stands on its own.
-  float Confidence() const;
-
-  // Returns a vector containing all timesteps, which belong to the currently
-  // selected symbol. A timestep is a vector containing pairs of symbols and
-  // floating point numbers. The number states the probability for the
-  // corresponding symbol.
-  std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
-
-private:
-  // clears the remaining spaces out of the results and adapt the probabilities
-  void filterSpaces();
-  // Pointer to the WERD_RES object owned by the API.
-  WERD_RES *word_res_;
-  // Iterator over the blob choices.
-  BLOB_CHOICE_IT *choice_it_;
-  std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
-  std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
-
-  const int *tstep_index_;
-  // regulates the rating granularity
-  double rating_coefficient_;
-  // leading blanks
-  int blanks_before_word_;
-  // true when there is lstm engine related trained data
-  bool oemLSTM_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/ocrclass.h
+++ b/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/ocrclass.h
@ -1,158 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-/**********************************************************************
- * File:        ocrclass.h
- * Description: Class definitions and constants for the OCR API.
- * Author:      Hewlett-Packard Co
- *
- * (C) Copyright 1996, Hewlett-Packard Co.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-/**********************************************************************
- * This file contains typedefs for all the structures used by
- * the HP OCR interface.
- * The structures are designed to allow them to be used with any
- * structure alignment up to 8.
- **********************************************************************/
-
-#ifndef CCUTIL_OCRCLASS_H_
-#define CCUTIL_OCRCLASS_H_
-
-#include <chrono>
-#include <ctime>
-
-namespace tesseract {
-
-/**********************************************************************
- * EANYCODE_CHAR
- * Description of a single character. The character code is defined by
- * the character set of the current font.
- * Output text is sent as an array of these structures.
- * Spaces and line endings in the output are represented in the
- * structures of the surrounding characters. They are not directly
- * represented as characters.
- * The first character in a word has a positive value of blanks.
- * Missing information should be set to the defaults in the comments.
- * If word bounds are known, but not character bounds, then the top and
- * bottom of each character should be those of the word. The left of the
- * first and right of the last char in each word should be set. All other
- * lefts and rights should be set to -1.
- * If set, the values of right and bottom are left+width and top+height.
- * Most of the members come directly from the parameters to ocr_append_char.
- * The formatting member uses the enhancement parameter and combines the
- * line direction stuff into the top 3 bits.
- * The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
- * 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
- * the coding is, only that it is backwards compatible with the previous
- * version.
- **********************************************************************/
-
-struct EANYCODE_CHAR { /*single character */
-  // It should be noted that the format for char_code for version 2.0 and beyond
-  // is UTF8 which means that ASCII characters will come out as one structure
-  // but other characters will be returned in two or more instances of this
-  // structure with a single byte of the  UTF8 code in each, but each will have
-  // the same bounding box. Programs which want to handle languagues with
-  // different characters sets will need to handle extended characters
-  // appropriately, but *all* code needs to be prepared to receive UTF8 coded
-  // characters for characters such as bullet and fancy quotes.
-  uint16_t char_code; /*character itself */
-  int16_t left;       /*of char (-1) */
-  int16_t right;      /*of char (-1) */
-  int16_t top;        /*of char (-1) */
-  int16_t bottom;     /*of char (-1) */
-  int16_t font_index; /*what font (0) */
-  uint8_t confidence; /*0=perfect, 100=reject (0/100) */
-  uint8_t point_size; /*of char, 72=i inch, (10) */
-  int8_t blanks;      /*no of spaces before this char (1) */
-  uint8_t formatting; /*char formatting (0) */
-};
-
-/**********************************************************************
- * ETEXT_DESC
- * Description of the output of the OCR engine.
- * This structure is used as both a progress monitor and the final
- * output header, since it needs to be a valid progress monitor while
- * the OCR engine is storing its output to shared memory.
- * During progress, all the buffer info is -1.
- * Progress starts at 0 and increases to 100 during OCR. No other constraint.
- * Additionally the progress callback contains the bounding box of the word that
- * is currently being processed.
- * Every progress callback, the OCR engine must set ocr_alive to 1.
- * The HP side will set ocr_alive to 0. Repeated failure to reset
- * to 1 indicates that the OCR engine is dead.
- * If the cancel function is not null then it is called with the number of
- * user words found. If it returns true then operation is cancelled.
- **********************************************************************/
-class ETEXT_DESC;
-
-using CANCEL_FUNC = bool (*)(void *, int);
-using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
-using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
-
-class ETEXT_DESC { // output header
-public:
-  int16_t count{0};    /// chars in this buffer(0)
-  int16_t progress{0}; /// percent complete increasing (0-100)
-  /** Progress monitor covers word recognition and it does not cover layout
-   * analysis.
-   * See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
-  int8_t more_to_come{0};       /// true if not last
-  volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
-  int8_t err_code{0};           /// for errcode use
-  CANCEL_FUNC cancel{nullptr};  /// returns true to cancel
-  PROGRESS_FUNC progress_callback{
-      nullptr};                      /// called whenever progress increases
-  PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
-  void *cancel_this{nullptr};        /// this or other data for cancel
-  std::chrono::steady_clock::time_point end_time;
-  /// Time to stop. Expected to be set only
-  /// by call to set_deadline_msecs().
-  EANYCODE_CHAR text[1]{}; /// character data
-
-  ETEXT_DESC() : progress_callback2(&default_progress_func) {
-    end_time = std::chrono::time_point<std::chrono::steady_clock,
-                                       std::chrono::milliseconds>();
-  }
-
-  // Sets the end time to be deadline_msecs milliseconds from now.
-  void set_deadline_msecs(int32_t deadline_msecs) {
-    if (deadline_msecs > 0) {
-      end_time = std::chrono::steady_clock::now() +
-                 std::chrono::milliseconds(deadline_msecs);
-    }
-  }
-
-  // Returns false if we've not passed the end_time, or have not set a deadline.
-  bool deadline_exceeded() const {
-    if (end_time.time_since_epoch() ==
-        std::chrono::steady_clock::duration::zero()) {
-      return false;
-    }
-    auto now = std::chrono::steady_clock::now();
-    return (now > end_time);
-  }
-
-private:
-  static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
-                                    int top, int bottom) {
-    if (ths->progress_callback != nullptr) {
-      return (*(ths->progress_callback))(ths->progress, left, right, top,
-                                         bottom);
-    }
-    return true;
-  }
-};
-
-} // namespace tesseract
-
-#endif // CCUTIL_OCRCLASS_H_
--- a/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/osdetect.h
+++ b/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/osdetect.h
@ -1,139 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        osdetect.h
-// Description: Orientation and script detection.
-// Author:      Samuel Charron
-//              Ranjith Unnikrishnan
-//
-// (C) Copyright 2008, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_OSDETECT_H_
-#define TESSERACT_CCMAIN_OSDETECT_H_
-
-#include "export.h" // for TESS_API
-
-#include <vector> // for std::vector
-
-namespace tesseract {
-
-class BLOBNBOX;
-class BLOBNBOX_CLIST;
-class BLOB_CHOICE_LIST;
-class TO_BLOCK_LIST;
-class UNICHARSET;
-
-class Tesseract;
-
-// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
-const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
-
-struct OSBestResult {
-  OSBestResult()
-      : orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
-  int orientation_id;
-  int script_id;
-  float sconfidence;
-  float oconfidence;
-};
-
-struct OSResults {
-  OSResults() : unicharset(nullptr) {
-    for (int i = 0; i < 4; ++i) {
-      for (int j = 0; j < kMaxNumberOfScripts; ++j) {
-        scripts_na[i][j] = 0;
-      }
-      orientations[i] = 0;
-    }
-  }
-  void update_best_orientation();
-  // Set the estimate of the orientation to the given id.
-  void set_best_orientation(int orientation_id);
-  // Update/Compute the best estimate of the script assuming the given
-  // orientation id.
-  void update_best_script(int orientation_id);
-  // Return the index of the script with the highest score for this orientation.
-  TESS_API int get_best_script(int orientation_id) const;
-  // Accumulate scores with given OSResults instance and update the best script.
-  void accumulate(const OSResults &osr);
-
-  // Print statistics.
-  void print_scores(void) const;
-  void print_scores(int orientation_id) const;
-
-  // Array holding scores for each orientation id [0,3].
-  // Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
-  // page respectively, where the values refer to the amount of clockwise
-  // rotation to be applied to the page for the text to be upright and readable.
-  float orientations[4];
-  // Script confidence scores for each of 4 possible orientations.
-  float scripts_na[4][kMaxNumberOfScripts];
-
-  UNICHARSET *unicharset;
-  OSBestResult best_result;
-};
-
-class OrientationDetector {
-public:
-  OrientationDetector(const std::vector<int> *allowed_scripts,
-                      OSResults *results);
-  bool detect_blob(BLOB_CHOICE_LIST *scores);
-  int get_orientation();
-
-private:
-  OSResults *osr_;
-  const std::vector<int> *allowed_scripts_;
-};
-
-class ScriptDetector {
-public:
-  ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
-                 tesseract::Tesseract *tess);
-  void detect_blob(BLOB_CHOICE_LIST *scores);
-  bool must_stop(int orientation) const;
-
-private:
-  OSResults *osr_;
-  static const char *korean_script_;
-  static const char *japanese_script_;
-  static const char *fraktur_script_;
-  int korean_id_;
-  int japanese_id_;
-  int katakana_id_;
-  int hiragana_id_;
-  int han_id_;
-  int hangul_id_;
-  int latin_id_;
-  int fraktur_id_;
-  tesseract::Tesseract *tess_;
-  const std::vector<int> *allowed_scripts_;
-};
-
-int orientation_and_script_detection(const char *filename, OSResults *,
-                                     tesseract::Tesseract *);
-
-int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
-              tesseract::Tesseract *tess);
-
-int os_detect_blobs(const std::vector<int> *allowed_scripts,
-                    BLOBNBOX_CLIST *blob_list, OSResults *osr,
-                    tesseract::Tesseract *tess);
-
-bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
-                    OSResults *, tesseract::Tesseract *tess);
-
-// Helper method to convert an orientation index to its value in degrees.
-// The value represents the amount of clockwise rotation in degrees that must be
-// applied for the text to be upright (readable).
-TESS_API int OrientationIdToValue(const int &id);
-
-} // namespace tesseract
-
-#endif // TESSERACT_CCMAIN_OSDETECT_H_
--- a/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/pageiterator.h
+++ b/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/pageiterator.h
@ -1,364 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        pageiterator.h
-// Description: Iterator for tesseract page structure that avoids using
-//              tesseract internal data structures.
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
-#define TESSERACT_CCMAIN_PAGEITERATOR_H_
-
-#include "export.h"
-#include "publictypes.h"
-
-struct Pix;
-struct Pta;
-
-namespace tesseract {
-
-struct BlamerBundle;
-class C_BLOB_IT;
-class PAGE_RES;
-class PAGE_RES_IT;
-class WERD;
-
-class Tesseract;
-
-/**
- * Class to iterate over tesseract page structure, providing access to all
- * levels of the page hierarchy, without including any tesseract headers or
- * having to handle any tesseract structures.
- * WARNING! This class points to data held within the TessBaseAPI class, and
- * therefore can only be used while the TessBaseAPI class still exists and
- * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
- * DetectOS, or anything else that changes the internal PAGE_RES.
- * See tesseract/publictypes.h for the definition of PageIteratorLevel.
- * See also ResultIterator, derived from PageIterator, which adds in the
- * ability to access OCR output with text-specific methods.
- */
-
-class TESS_API PageIterator {
-public:
-  /**
-   * page_res and tesseract come directly from the BaseAPI.
-   * The rectangle parameters are copied indirectly from the Thresholder,
-   * via the BaseAPI. They represent the coordinates of some rectangle in an
-   * original image (in top-left-origin coordinates) and therefore the top-left
-   * needs to be added to any output boxes in order to specify coordinates
-   * in the original image. See TessBaseAPI::SetRectangle.
-   * The scale and scaled_yres are in case the Thresholder scaled the image
-   * rectangle prior to thresholding. Any coordinates in tesseract's image
-   * must be divided by scale before adding (rect_left, rect_top).
-   * The scaled_yres indicates the effective resolution of the binary image
-   * that tesseract has been given by the Thresholder.
-   * After the constructor, Begin has already been called.
-   */
-  PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
-               int scaled_yres, int rect_left, int rect_top, int rect_width,
-               int rect_height);
-  virtual ~PageIterator();
-
-  /**
-   * Page/ResultIterators may be copied! This makes it possible to iterate over
-   * all the objects at a lower level, while maintaining an iterator to
-   * objects at a higher level. These constructors DO NOT CALL Begin, so
-   * iterations will continue from the location of src.
-   */
-  PageIterator(const PageIterator &src);
-  const PageIterator &operator=(const PageIterator &src);
-
-  /** Are we positioned at the same location as other? */
-  bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
-
-  // ============= Moving around within the page ============.
-
-  /**
-   * Moves the iterator to point to the start of the page to begin an
-   * iteration.
-   */
-  virtual void Begin();
-
-  /**
-   * Moves the iterator to the beginning of the paragraph.
-   * This class implements this functionality by moving it to the zero indexed
-   * blob of the first (leftmost) word on the first row of the paragraph.
-   */
-  virtual void RestartParagraph();
-
-  /**
-   * Return whether this iterator points anywhere in the first textline of a
-   * paragraph.
-   */
-  bool IsWithinFirstTextlineOfParagraph() const;
-
-  /**
-   * Moves the iterator to the beginning of the text line.
-   * This class implements this functionality by moving it to the zero indexed
-   * blob of the first (leftmost) word of the row.
-   */
-  virtual void RestartRow();
-
-  /**
-   * Moves to the start of the next object at the given level in the
-   * page hierarchy, and returns false if the end of the page was reached.
-   * NOTE that RIL_SYMBOL will skip non-text blocks, but all other
-   * PageIteratorLevel level values will visit each non-text block once.
-   * Think of non text blocks as containing a single para, with a single line,
-   * with a single imaginary word.
-   * Calls to Next with different levels may be freely intermixed.
-   * This function iterates words in right-to-left scripts correctly, if
-   * the appropriate language has been loaded into Tesseract.
-   */
-  virtual bool Next(PageIteratorLevel level);
-
-  /**
-   * Returns true if the iterator is at the start of an object at the given
-   * level.
-   *
-   * For instance, suppose an iterator it is pointed to the first symbol of the
-   * first word of the third line of the second paragraph of the first block in
-   * a page, then:
-   *   it.IsAtBeginningOf(RIL_BLOCK) = false
-   *   it.IsAtBeginningOf(RIL_PARA) = false
-   *   it.IsAtBeginningOf(RIL_TEXTLINE) = true
-   *   it.IsAtBeginningOf(RIL_WORD) = true
-   *   it.IsAtBeginningOf(RIL_SYMBOL) = true
-   */
-  virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
-
-  /**
-   * Returns whether the iterator is positioned at the last element in a
-   * given level. (e.g. the last word in a line, the last line in a block)
-   *
-   *     Here's some two-paragraph example
-   *   text.  It starts off innocuously
-   *   enough but quickly turns bizarre.
-   *     The author inserts a cornucopia
-   *   of words to guard against confused
-   *   references.
-   *
-   * Now take an iterator it pointed to the start of "bizarre."
-   *  it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
-   *  it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
-   *  it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
-   */
-  virtual bool IsAtFinalElement(PageIteratorLevel level,
-                                PageIteratorLevel element) const;
-
-  /**
-   * Returns whether this iterator is positioned
-   *   before other:   -1
-   *   equal to other:  0
-   *   after other:     1
-   */
-  int Cmp(const PageIterator &other) const;
-
-  // ============= Accessing data ==============.
-  // Coordinate system:
-  // Integer coordinates are at the cracks between the pixels.
-  // The top-left corner of the top-left pixel in the image is at (0,0).
-  // The bottom-right corner of the bottom-right pixel in the image is at
-  // (width, height).
-  // Every bounding box goes from the top-left of the top-left contained
-  // pixel to the bottom-right of the bottom-right contained pixel, so
-  // the bounding box of the single top-left pixel in the image is:
-  // (0,0)->(1,1).
-  // If an image rectangle has been set in the API, then returned coordinates
-  // relate to the original (full) image, rather than the rectangle.
-
-  /**
-   * Controls what to include in a bounding box. Bounding boxes of all levels
-   * between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
-   * Between layout analysis and recognition, it isn't known where all
-   * diacritics belong, so this control is used to include or exclude some
-   * diacritics that are above or below the main body of the word. In most cases
-   * where the placement is obvious, and after recognition, it doesn't make as
-   * much difference, as the diacritics will already be included in the word.
-   */
-  void SetBoundingBoxComponents(bool include_upper_dots,
-                                bool include_lower_dots) {
-    include_upper_dots_ = include_upper_dots;
-    include_lower_dots_ = include_lower_dots;
-  }
-
-  /**
-   * Returns the bounding rectangle of the current object at the given level.
-   * See comment on coordinate system above.
-   * Returns false if there is no such object at the current position.
-   * The returned bounding box is guaranteed to match the size and position
-   * of the image returned by GetBinaryImage, but may clip foreground pixels
-   * from a grey image. The padding argument to GetImage can be used to expand
-   * the image to include more foreground pixels. See GetImage below.
-   */
-  bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
-                   int *bottom) const;
-  bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
-                   int *right, int *bottom) const;
-  /**
-   * Returns the bounding rectangle of the object in a coordinate system of the
-   * working image rectangle having its origin at (rect_left_, rect_top_) with
-   * respect to the original image and is scaled by a factor scale_.
-   */
-  bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
-                           int *right, int *bottom) const;
-
-  /** Returns whether there is no object of a given level. */
-  bool Empty(PageIteratorLevel level) const;
-
-  /**
-   * Returns the type of the current block.
-   * See tesseract/publictypes.h for PolyBlockType.
-   */
-  PolyBlockType BlockType() const;
-
-  /**
-   * Returns the polygon outline of the current block. The returned Pta must
-   * be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
-   * of the polygon, and the last edge is the line segment between the last
-   * point and the first point. nullptr will be returned if the iterator is
-   * at the end of the document or layout analysis was not used.
-   */
-  Pta *BlockPolygon() const;
-
-  /**
-   * Returns a binary image of the current object at the given level.
-   * The position and size match the return from BoundingBoxInternal, and so
-   * this could be upscaled with respect to the original input image.
-   * Use pixDestroy to delete the image after use.
-   */
-  Pix *GetBinaryImage(PageIteratorLevel level) const;
-
-  /**
-   * Returns an image of the current object at the given level in greyscale
-   * if available in the input. To guarantee a binary image use BinaryImage.
-   * NOTE that in order to give the best possible image, the bounds are
-   * expanded slightly over the binary connected component, by the supplied
-   * padding, so the top-left position of the returned image is returned
-   * in (left,top). These will most likely not match the coordinates
-   * returned by BoundingBox.
-   * If you do not supply an original image, you will get a binary one.
-   * Use pixDestroy to delete the image after use.
-   */
-  Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
-                int *left, int *top) const;
-
-  /**
-   * Returns the baseline of the current object at the given level.
-   * The baseline is the line that passes through (x1, y1) and (x2, y2).
-   * WARNING: with vertical text, baselines may be vertical!
-   * Returns false if there is no baseline at the current position.
-   */
-  bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
-                int *y2) const;
-
-  // Returns the attributes of the current row.
-  void RowAttributes(float *row_height, float *descenders,
-                     float *ascenders) const;
-
-  /**
-   * Returns orientation for the block the iterator points to.
-   *   orientation, writing_direction, textline_order: see publictypes.h
-   *   deskew_angle: after rotating the block so the text orientation is
-   *                 upright, how many radians does one have to rotate the
-   *                 block anti-clockwise for it to be level?
-   *                   -Pi/4 <= deskew_angle <= Pi/4
-   */
-  void Orientation(tesseract::Orientation *orientation,
-                   tesseract::WritingDirection *writing_direction,
-                   tesseract::TextlineOrder *textline_order,
-                   float *deskew_angle) const;
-
-  /**
-   * Returns information about the current paragraph, if available.
-   *
-   *   justification -
-   *     LEFT if ragged right, or fully justified and script is left-to-right.
-   *     RIGHT if ragged left, or fully justified and script is right-to-left.
-   *     unknown if it looks like source code or we have very few lines.
-   *   is_list_item -
-   *     true if we believe this is a member of an ordered or unordered list.
-   *   is_crown -
-   *     true if the first line of the paragraph is aligned with the other
-   *     lines of the paragraph even though subsequent paragraphs have first
-   *     line indents.  This typically indicates that this is the continuation
-   *     of a previous paragraph or that it is the very first paragraph in
-   *     the chapter.
-   *   first_line_indent -
-   *     For LEFT aligned paragraphs, the first text line of paragraphs of
-   *     this kind are indented this many pixels from the left edge of the
-   *     rest of the paragraph.
-   *     for RIGHT aligned paragraphs, the first text line of paragraphs of
-   *     this kind are indented this many pixels from the right edge of the
-   *     rest of the paragraph.
-   *     NOTE 1: This value may be negative.
-   *     NOTE 2: if *is_crown == true, the first line of this paragraph is
-   *             actually flush, and first_line_indent is set to the "common"
-   *             first_line_indent for subsequent paragraphs in this block
-   *             of text.
-   */
-  void ParagraphInfo(tesseract::ParagraphJustification *justification,
-                     bool *is_list_item, bool *is_crown,
-                     int *first_line_indent) const;
-
-  // If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
-  // of the current word to the given pointer (takes ownership of the pointer)
-  // and returns true.
-  // Can only be used when iterating on the word level.
-  bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
-
-protected:
-  /**
-   * Sets up the internal data for iterating the blobs of a new word, then
-   * moves the iterator to the given offset.
-   */
-  void BeginWord(int offset);
-
-  /** Pointer to the page_res owned by the API. */
-  PAGE_RES *page_res_;
-  /** Pointer to the Tesseract object owned by the API. */
-  Tesseract *tesseract_;
-  /**
-   * The iterator to the page_res_. Owned by this ResultIterator.
-   * A pointer just to avoid dragging in Tesseract includes.
-   */
-  PAGE_RES_IT *it_;
-  /**
-   * The current input WERD being iterated. If there is an output from OCR,
-   * then word_ is nullptr. Owned by the API
-   */
-  WERD *word_;
-  /** The length of the current word_. */
-  int word_length_;
-  /** The current blob index within the word. */
-  int blob_index_;
-  /**
-   * Iterator to the blobs within the word. If nullptr, then we are iterating
-   * OCR results in the box_word.
-   * Owned by this ResultIterator.
-   */
-  C_BLOB_IT *cblob_it_;
-  /** Control over what to include in bounding boxes. */
-  bool include_upper_dots_;
-  bool include_lower_dots_;
-  /** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
-  int scale_;
-  int scaled_yres_;
-  int rect_left_;
-  int rect_top_;
-  int rect_width_;
-  int rect_height_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/publictypes.h
+++ b/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/publictypes.h
@ -1,281 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        publictypes.h
-// Description: Types used in both the API and internally
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
-#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
-
-namespace tesseract {
-
-// This file contains types that are used both by the API and internally
-// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
-// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
-// Restated: It is OK for low-level Tesseract files to include publictypes.h,
-// but not for the low-level tesseract code to include top-level API code.
-// This file should not use other Tesseract types, as that would drag
-// their includes into the API-level.
-
-/** Number of printers' points in an inch. The unit of the pointsize return. */
-constexpr int kPointsPerInch = 72;
-/**
- * Minimum believable resolution. Used as a default if there is no other
- * information, as it is safer to under-estimate than over-estimate.
- */
-constexpr int kMinCredibleResolution = 70;
-/** Maximum believable resolution.  */
-constexpr int kMaxCredibleResolution = 2400;
-/**
- * Ratio between median blob size and likely resolution. Used to estimate
- * resolution when none is provided. This is basically 1/usual text size in
- * inches.  */
-constexpr int kResolutionEstimationFactor = 10;
-
-/**
- * Possible types for a POLY_BLOCK or ColPartition.
- * Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
- * below, as well as kPolyBlockNames in layout_test.cc.
- * Used extensively by ColPartition, and POLY_BLOCK.
- */
-enum PolyBlockType {
-  PT_UNKNOWN,         // Type is not yet known. Keep as the first element.
-  PT_FLOWING_TEXT,    // Text that lives inside a column.
-  PT_HEADING_TEXT,    // Text that spans more than one column.
-  PT_PULLOUT_TEXT,    // Text that is in a cross-column pull-out region.
-  PT_EQUATION,        // Partition belonging to an equation region.
-  PT_INLINE_EQUATION, // Partition has inline equation.
-  PT_TABLE,           // Partition belonging to a table region.
-  PT_VERTICAL_TEXT,   // Text-line runs vertically.
-  PT_CAPTION_TEXT,    // Text that belongs to an image.
-  PT_FLOWING_IMAGE,   // Image that lives inside a column.
-  PT_HEADING_IMAGE,   // Image that spans more than one column.
-  PT_PULLOUT_IMAGE,   // Image that is in a cross-column pull-out region.
-  PT_HORZ_LINE,       // Horizontal Line.
-  PT_VERT_LINE,       // Vertical Line.
-  PT_NOISE,           // Lies outside of any column.
-  PT_COUNT
-};
-
-/** Returns true if PolyBlockType is of horizontal line type */
-inline bool PTIsLineType(PolyBlockType type) {
-  return type == PT_HORZ_LINE || type == PT_VERT_LINE;
-}
-/** Returns true if PolyBlockType is of image type */
-inline bool PTIsImageType(PolyBlockType type) {
-  return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
-         type == PT_PULLOUT_IMAGE;
-}
-/** Returns true if PolyBlockType is of text type */
-inline bool PTIsTextType(PolyBlockType type) {
-  return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
-         type == PT_PULLOUT_TEXT || type == PT_TABLE ||
-         type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
-         type == PT_INLINE_EQUATION;
-}
-// Returns true if PolyBlockType is of pullout(inter-column) type
-inline bool PTIsPulloutType(PolyBlockType type) {
-  return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
-}
-
-/**
- *  +------------------+  Orientation Example:
- *  | 1 Aaaa Aaaa Aaaa |  ====================
- *  | Aaa aa aaa aa    |  To left is a diagram of some (1) English and
- *  | aaaaaa A aa aaa. |  (2) Chinese text and a (3) photo credit.
- *  |                2 |
- *  |   #######  c c C |  Upright Latin characters are represented as A and a.
- *  |   #######  c c c |  '<' represents a latin character rotated
- *  | < #######  c c c |      anti-clockwise 90 degrees.
- *  | < #######  c   c |
- *  | < #######  .   c |  Upright Chinese characters are represented C and c.
- *  | 3 #######      c |
- *  +------------------+  NOTA BENE: enum values here should match goodoc.proto
-
- * If you orient your head so that "up" aligns with Orientation,
- * then the characters will appear "right side up" and readable.
- *
- * In the example above, both the English and Chinese paragraphs are oriented
- * so their "up" is the top of the page (page up).  The photo credit is read
- * with one's head turned leftward ("up" is to page left).
- *
- * The values of this enum match the convention of Tesseract's osdetect.h
-*/
-enum Orientation {
-  ORIENTATION_PAGE_UP = 0,
-  ORIENTATION_PAGE_RIGHT = 1,
-  ORIENTATION_PAGE_DOWN = 2,
-  ORIENTATION_PAGE_LEFT = 3,
-};
-
-/**
- * The grapheme clusters within a line of text are laid out logically
- * in this direction, judged when looking at the text line rotated so that
- * its Orientation is "page up".
- *
- * For English text, the writing direction is left-to-right.  For the
- * Chinese text in the above example, the writing direction is top-to-bottom.
- */
-enum WritingDirection {
-  WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
-  WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
-  WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
-};
-
-/**
- * The text lines are read in the given sequence.
- *
- * In English, the order is top-to-bottom.
- * In Chinese, vertical text lines are read right-to-left.  Mongolian is
- * written in vertical columns top to bottom like Chinese, but the lines
- * order left-to right.
- *
- * Note that only some combinations make sense.  For example,
- * WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
- */
-enum TextlineOrder {
-  TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
-  TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
-  TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
-};
-
-/**
- * Possible modes for page layout analysis. These *must* be kept in order
- * of decreasing amount of layout analysis to be done, except for OSD_ONLY,
- * so that the inequality test macros below work.
- */
-enum PageSegMode {
-  PSM_OSD_ONLY = 0,      ///< Orientation and script detection only.
-  PSM_AUTO_OSD = 1,      ///< Automatic page segmentation with orientation and
-                         ///< script detection. (OSD)
-  PSM_AUTO_ONLY = 2,     ///< Automatic page segmentation, but no OSD, or OCR.
-  PSM_AUTO = 3,          ///< Fully automatic page segmentation, but no OSD.
-  PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
-  PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
-                                  ///< vertically aligned text.
-  PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
-  PSM_SINGLE_LINE = 7,  ///< Treat the image as a single text line.
-  PSM_SINGLE_WORD = 8,  ///< Treat the image as a single word.
-  PSM_CIRCLE_WORD = 9,  ///< Treat the image as a single word in a circle.
-  PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
-  PSM_SPARSE_TEXT =
-      11, ///< Find as much text as possible in no particular order.
-  PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
-  PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
-                     ///< hacks that are Tesseract-specific.
-
-  PSM_COUNT ///< Number of enum entries.
-};
-
-/**
- * Inline functions that act on a PageSegMode to determine whether components of
- * layout analysis are enabled.
- * *Depend critically on the order of elements of PageSegMode.*
- * NOTE that arg is an int for compatibility with INT_PARAM.
- */
-inline bool PSM_OSD_ENABLED(int pageseg_mode) {
-  return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
-  return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
-}
-inline bool PSM_SPARSE(int pageseg_mode) {
-  return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
-}
-inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
-}
-inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
-  return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
-         pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-
-/**
- * enum of the elements of the page hierarchy, used in ResultIterator
- * to provide functions that operate on each level without having to
- * have 5x as many functions.
- */
-enum PageIteratorLevel {
-  RIL_BLOCK,    // Block of text/image/separator line.
-  RIL_PARA,     // Paragraph within a block.
-  RIL_TEXTLINE, // Line within a paragraph.
-  RIL_WORD,     // Word within a textline.
-  RIL_SYMBOL    // Symbol/character within a word.
-};
-
-/**
- * JUSTIFICATION_UNKNOWN
- *   The alignment is not clearly one of the other options.  This could happen
- *   for example if there are only one or two lines of text or the text looks
- *   like source code or poetry.
- *
- * NOTA BENE: Fully justified paragraphs (text aligned to both left and right
- *    margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
- *    is written with a left-to-right script and with JUSTIFICATION_RIGHT if
- *    their text is written in a right-to-left script.
- *
- * Interpretation for text read in vertical lines:
- *   "Left" is wherever the starting reading position is.
- *
- * JUSTIFICATION_LEFT
- *   Each line, except possibly the first, is flush to the same left tab stop.
- *
- * JUSTIFICATION_CENTER
- *   The text lines of the paragraph are centered about a line going
- *   down through their middle of the text lines.
- *
- * JUSTIFICATION_RIGHT
- *   Each line, except possibly the first, is flush to the same right tab stop.
- */
-enum ParagraphJustification {
-  JUSTIFICATION_UNKNOWN,
-  JUSTIFICATION_LEFT,
-  JUSTIFICATION_CENTER,
-  JUSTIFICATION_RIGHT,
-};
-
-/**
- * When Tesseract/Cube is initialized we can choose to instantiate/load/run
- * only the Tesseract part, only the Cube part or both along with the combiner.
- * The preference of which engine to use is stored in tessedit_ocr_engine_mode.
- *
- * ATTENTION: When modifying this enum, please make sure to make the
- * appropriate changes to all the enums mirroring it (e.g. OCREngine in
- * cityblock/workflow/detection/detection_storage.proto). Such enums will
- * mention the connection to OcrEngineMode in the comments.
- */
-enum OcrEngineMode {
-  OEM_TESSERACT_ONLY,          // Run Tesseract only - fastest; deprecated
-  OEM_LSTM_ONLY,               // Run just the LSTM line recognizer.
-  OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
-                               // to Tesseract when things get difficult.
-                               // deprecated
-  OEM_DEFAULT,                 // Specify this mode when calling init_*(),
-                               // to indicate that any of the above modes
-                               // should be automatically inferred from the
-                               // variables in the language-specific config,
-                               // command-line configs, or if not specified
-                               // in any of the above should be set to the
-                               // default OEM_TESSERACT_ONLY.
-  OEM_COUNT                    // Number of OEMs
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
--- a/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/renderer.h
+++ b/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/renderer.h
@ -1,311 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        renderer.h
-// Description: Rendering interface to inject into TessBaseAPI
-//
-// (C) Copyright 2011, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_RENDERER_H_
-#define TESSERACT_API_RENDERER_H_
-
-#include "export.h"
-
-// To avoid collision with other typenames include the ABSOLUTE MINIMUM
-// complexity of includes here. Use forward declarations wherever possible
-// and hide includes of complex types in baseapi.cpp.
-#include <cstdint>
-#include <string> // for std::string
-#include <vector> // for std::vector
-
-struct Pix;
-
-namespace tesseract {
-
-class TessBaseAPI;
-
-/**
- * Interface for rendering tesseract results into a document, such as text,
- * HOCR or pdf. This class is abstract. Specific classes handle individual
- * formats. This interface is then used to inject the renderer class into
- * tesseract when processing images.
- *
- * For simplicity implementing this with tesseract version 3.01,
- * the renderer contains document state that is cleared from document
- * to document just as the TessBaseAPI is. This way the base API can just
- * delegate its rendering functionality to injected renderers, and the
- * renderers can manage the associated state needed for the specific formats
- * in addition to the heuristics for producing it.
- */
-class TESS_API TessResultRenderer {
-public:
-  virtual ~TessResultRenderer();
-
-  // Takes ownership of pointer so must be new'd instance.
-  // Renderers aren't ordered, but appends the sequences of next parameter
-  // and existing next(). The renderers should be unique across both lists.
-  void insert(TessResultRenderer *next);
-
-  // Returns the next renderer or nullptr.
-  TessResultRenderer *next() {
-    return next_;
-  }
-
-  /**
-   * Starts a new document with the given title.
-   * This clears the contents of the output data.
-   * Title should use UTF-8 encoding.
-   */
-  bool BeginDocument(const char *title);
-
-  /**
-   * Adds the recognized text from the source image to the current document.
-   * Invalid if BeginDocument not yet called.
-   *
-   * Note that this API is a bit weird but is designed to fit into the
-   * current TessBaseAPI implementation where the api has lots of state
-   * information that we might want to add in.
-   */
-  bool AddImage(TessBaseAPI *api);
-
-  /**
-   * Finishes the document and finalizes the output data
-   * Invalid if BeginDocument not yet called.
-   */
-  bool EndDocument();
-
-  const char *file_extension() const {
-    return file_extension_;
-  }
-  const char *title() const {
-    return title_.c_str();
-  }
-
-  // Is everything fine? Otherwise something went wrong.
-  bool happy() const {
-    return happy_;
-  }
-
-  /**
-   * Returns the index of the last image given to AddImage
-   * (i.e. images are incremented whether the image succeeded or not)
-   *
-   * This is always defined. It means either the number of the
-   * current image, the last image ended, or in the completed document
-   * depending on when in the document lifecycle you are looking at it.
-   * Will return -1 if a document was never started.
-   */
-  int imagenum() const {
-    return imagenum_;
-  }
-
-protected:
-  /**
-   * Called by concrete classes.
-   *
-   * outputbase is the name of the output file excluding
-   * extension. For example, "/path/to/chocolate-chip-cookie-recipe"
-   *
-   * extension indicates the file extension to be used for output
-   * files. For example "pdf" will produce a .pdf file, and "hocr"
-   * will produce .hocr files.
-   */
-  TessResultRenderer(const char *outputbase, const char *extension);
-
-  // Hook for specialized handling in BeginDocument()
-  virtual bool BeginDocumentHandler();
-
-  // This must be overridden to render the OCR'd results
-  virtual bool AddImageHandler(TessBaseAPI *api) = 0;
-
-  // Hook for specialized handling in EndDocument()
-  virtual bool EndDocumentHandler();
-
-  // Renderers can call this to append '\0' terminated strings into
-  // the output string returned by GetOutput.
-  // This method will grow the output buffer if needed.
-  void AppendString(const char *s);
-
-  // Renderers can call this to append binary byte sequences into
-  // the output string returned by GetOutput. Note that s is not necessarily
-  // '\0' terminated (and can contain '\0' within it).
-  // This method will grow the output buffer if needed.
-  void AppendData(const char *s, int len);
-
-private:
-  TessResultRenderer *next_;   // Can link multiple renderers together
-  FILE *fout_;                 // output file pointer
-  const char *file_extension_; // standard extension for generated output
-  std::string title_;          // title of document being rendered
-  int imagenum_;               // index of last image added
-  bool happy_;                 // I get grumpy when the disk fills up, etc.
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessTextRenderer : public TessResultRenderer {
-public:
-  explicit TessTextRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into an hocr text string
- */
-class TESS_API TessHOcrRenderer : public TessResultRenderer {
-public:
-  explicit TessHOcrRenderer(const char *outputbase, bool font_info);
-  explicit TessHOcrRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool font_info_; // whether to print font information
-};
-
-/**
- * Renders tesseract output into an alto text string
- */
-class TESS_API TessAltoRenderer : public TessResultRenderer {
-public:
-  explicit TessAltoRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool begin_document;
-};
-
-/**
- * Renders Tesseract output into a TSV string
- */
-class TESS_API TessTsvRenderer : public TessResultRenderer {
-public:
-  explicit TessTsvRenderer(const char *outputbase, bool font_info);
-  explicit TessTsvRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool font_info_; // whether to print font information
-};
-
-/**
- * Renders tesseract output into searchable PDF
- */
-class TESS_API TessPDFRenderer : public TessResultRenderer {
-public:
-  // datadir is the location of the TESSDATA. We need it because
-  // we load a custom PDF font from this location.
-  TessPDFRenderer(const char *outputbase, const char *datadir,
-                  bool textonly = false);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  // We don't want to have every image in memory at once,
-  // so we store some metadata as we go along producing
-  // PDFs one page at a time. At the end, that metadata is
-  // used to make everything that isn't easily handled in a
-  // streaming fashion.
-  long int obj_;                  // counter for PDF objects
-  std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
-  std::vector<long int> pages_;   // object number for every /Page object
-  std::string datadir_;           // where to find the custom font
-  bool textonly_;                 // skip images if set
-  // Bookkeeping only. DIY = Do It Yourself.
-  void AppendPDFObjectDIY(size_t objectsize);
-  // Bookkeeping + emit data.
-  void AppendPDFObject(const char *data);
-  // Create the /Contents object for an entire page.
-  char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
-  // Turn an image into a PDF object. Only transcode if we have to.
-  static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
-                            char **pdf_object, long int *pdf_object_size,
-                            int jpg_quality);
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessUnlvRenderer : public TessResultRenderer {
-public:
-  explicit TessUnlvRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string for LSTMBox
- */
-class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
-public:
-  explicit TessLSTMBoxRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessBoxTextRenderer : public TessResultRenderer {
-public:
-  explicit TessBoxTextRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string in WordStr format
- */
-class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
-public:
-  explicit TessWordStrBoxRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-/**
- * Renders tesseract output into an osd text string
- */
-class TESS_API TessOsdRenderer : public TessResultRenderer {
-public:
-  explicit TessOsdRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-#endif // ndef DISABLED_LEGACY_ENGINE
-
-} // namespace tesseract.
-
-#endif // TESSERACT_API_RENDERER_H_
--- a/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/resultiterator.h
+++ b/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/resultiterator.h
@ -1,250 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        resultiterator.h
-// Description: Iterator for tesseract results that is capable of
-//              iterating in proper reading order over Bi Directional
-//              (e.g. mixed Hebrew and English) text.
-// Author:      David Eger
-//
-// (C) Copyright 2011, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
-#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
-
-#include "export.h"            // for TESS_API, TESS_LOCAL
-#include "ltrresultiterator.h" // for LTRResultIterator
-#include "publictypes.h"       // for PageIteratorLevel
-#include "unichar.h"           // for StrongScriptDirection
-
-#include <set>    // for std::pair
-#include <vector> // for std::vector
-
-namespace tesseract {
-
-class TESS_API ResultIterator : public LTRResultIterator {
-public:
-  static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
-
-  /**
-   * ResultIterator is copy constructible!
-   * The default copy constructor works just fine for us.
-   */
-  ~ResultIterator() override = default;
-
-  // ============= Moving around within the page ============.
-  /**
-   * Moves the iterator to point to the start of the page to begin
-   * an iteration.
-   */
-  void Begin() override;
-
-  /**
-   * Moves to the start of the next object at the given level in the
-   * page hierarchy in the appropriate reading order and returns false if
-   * the end of the page was reached.
-   * NOTE that RIL_SYMBOL will skip non-text blocks, but all other
-   * PageIteratorLevel level values will visit each non-text block once.
-   * Think of non text blocks as containing a single para, with a single line,
-   * with a single imaginary word.
-   * Calls to Next with different levels may be freely intermixed.
-   * This function iterates words in right-to-left scripts correctly, if
-   * the appropriate language has been loaded into Tesseract.
-   */
-  bool Next(PageIteratorLevel level) override;
-
-  /**
-   * IsAtBeginningOf() returns whether we're at the logical beginning of the
-   * given level.  (as opposed to ResultIterator's left-to-right top-to-bottom
-   * order).  Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
-   * For a full description, see pageiterator.h
-   */
-  bool IsAtBeginningOf(PageIteratorLevel level) const override;
-
-  /**
-   * Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
-   * For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
-   * point at the last word in a paragraph.  See PageIterator for full comment.
-   */
-  bool IsAtFinalElement(PageIteratorLevel level,
-                        PageIteratorLevel element) const override;
-
-  // ============= Functions that refer to words only ============.
-  // Returns the number of blanks before the current word.
-  int BlanksBeforeWord() const;
-
-  // ============= Accessing data ==============.
-
-  /**
-   * Returns the null terminated UTF-8 encoded text string for the current
-   * object at the given level. Use delete [] to free after use.
-   */
-  virtual char *GetUTF8Text(PageIteratorLevel level) const;
-
-  /**
-   * Returns the LSTM choices for every LSTM timestep for the current word.
-   */
-  virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
-      *GetRawLSTMTimesteps() const;
-  virtual std::vector<std::vector<std::pair<const char *, float>>>
-      *GetBestLSTMSymbolChoices() const;
-
-  /**
-   * Return whether the current paragraph's dominant reading direction
-   * is left-to-right (as opposed to right-to-left).
-   */
-  bool ParagraphIsLtr() const;
-
-  // ============= Exposed only for testing =============.
-
-  /**
-   * Yields the reading order as a sequence of indices and (optional)
-   * meta-marks for a set of words (given left-to-right).
-   * The meta marks are passed as negative values:
-   *   kMinorRunStart  Start of minor direction text.
-   *   kMinorRunEnd    End of minor direction text.
-   *   kComplexWord    The next indexed word contains both left-to-right and
-   *                    right-to-left characters and was treated as neutral.
-   *
-   * For example, suppose we have five words in a text line,
-   * indexed [0,1,2,3,4] from the leftmost side of the text line.
-   * The following are all believable reading_orders:
-   *
-   * Left-to-Right (in ltr paragraph):
-   *     { 0, 1, 2, 3, 4 }
-   * Left-to-Right (in rtl paragraph):
-   *     { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
-   * Right-to-Left (in rtl paragraph):
-   *     { 4, 3, 2, 1, 0 }
-   * Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
-   *     { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
-   */
-  static void CalculateTextlineOrder(
-      bool paragraph_is_ltr,
-      const std::vector<StrongScriptDirection> &word_dirs,
-      std::vector<int> *reading_order);
-
-  static const int kMinorRunStart;
-  static const int kMinorRunEnd;
-  static const int kComplexWord;
-
-protected:
-  /**
-   * We presume the data associated with the given iterator will outlive us.
-   * NB: This is private because it does something that is non-obvious:
-   *   it resets to the beginning of the paragraph instead of staying wherever
-   *   resit might have pointed.
-   */
-  explicit ResultIterator(const LTRResultIterator &resit);
-
-private:
-  /**
-   * Calculates the current paragraph's dominant writing direction.
-   * Typically, members should use current_paragraph_ltr_ instead.
-   */
-  bool CurrentParagraphIsLtr() const;
-
-  /**
-   * Returns word indices as measured from resit->RestartRow() = index 0
-   * for the reading order of words within a textline given an iterator
-   * into the middle of the text line.
-   * In addition to non-negative word indices, the following negative values
-   * may be inserted:
-   *   kMinorRunStart  Start of minor direction text.
-   *   kMinorRunEnd    End of minor direction text.
-   *   kComplexWord    The previous word contains both left-to-right and
-   *                   right-to-left characters and was treated as neutral.
-   */
-  void CalculateTextlineOrder(bool paragraph_is_ltr,
-                              const LTRResultIterator &resit,
-                              std::vector<int> *indices) const;
-  /** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
-  void CalculateTextlineOrder(bool paragraph_is_ltr,
-                              const LTRResultIterator &resit,
-                              std::vector<StrongScriptDirection> *ssd,
-                              std::vector<int> *indices) const;
-
-  /**
-   * What is the index of the current word in a strict left-to-right reading
-   * of the row?
-   */
-  int LTRWordIndex() const;
-
-  /**
-   * Given an iterator pointing at a word, returns the logical reading order
-   * of blob indices for the word.
-   */
-  void CalculateBlobOrder(std::vector<int> *blob_indices) const;
-
-  /** Precondition: current_paragraph_is_ltr_ is set. */
-  void MoveToLogicalStartOfTextline();
-
-  /**
-   * Precondition: current_paragraph_is_ltr_ and in_minor_direction_
-   * are set.
-   */
-  void MoveToLogicalStartOfWord();
-
-  /** Are we pointing at the final (reading order) symbol of the word? */
-  bool IsAtFinalSymbolOfWord() const;
-
-  /** Are we pointing at the first (reading order) symbol of the word? */
-  bool IsAtFirstSymbolOfWord() const;
-
-  /**
-   * Append any extra marks that should be appended to this word when printed.
-   * Mostly, these are Unicode BiDi control characters.
-   */
-  void AppendSuffixMarks(std::string *text) const;
-
-  /** Appends the current word in reading order to the given buffer.*/
-  void AppendUTF8WordText(std::string *text) const;
-
-  /**
-   * Appends the text of the current text line, *assuming this iterator is
-   * positioned at the beginning of the text line*  This function
-   * updates the iterator to point to the first position past the text line.
-   * Each textline is terminated in a single newline character.
-   * If the textline ends a paragraph, it gets a second terminal newline.
-   */
-  void IterateAndAppendUTF8TextlineText(std::string *text);
-
-  /**
-   * Appends the text of the current paragraph in reading order
-   * to the given buffer.
-   * Each textline is terminated in a single newline character, and the
-   * paragraph gets an extra newline at the end.
-   */
-  void AppendUTF8ParagraphText(std::string *text) const;
-
-  /** Returns whether the bidi_debug flag is set to at least min_level. */
-  bool BidiDebug(int min_level) const;
-
-  bool current_paragraph_is_ltr_;
-
-  /**
-   * Is the currently pointed-at character at the beginning of
-   * a minor-direction run?
-   */
-  bool at_beginning_of_minor_run_;
-
-  /** Is the currently pointed-at character in a minor-direction sequence? */
-  bool in_minor_direction_;
-
-  /**
-   * Should detected inter-word spaces be preserved, or "compressed" to a single
-   * space character (default behavior).
-   */
-  bool preserve_interword_spaces_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/unichar.h
+++ b/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/unichar.h
@ -1,174 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        unichar.h
-// Description: Unicode character/ligature class.
-// Author:      Ray Smith
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCUTIL_UNICHAR_H_
-#define TESSERACT_CCUTIL_UNICHAR_H_
-
-#include "export.h"
-
-#include <memory.h>
-#include <cstring>
-#include <string>
-#include <vector>
-
-namespace tesseract {
-
-// Maximum number of characters that can be stored in a UNICHAR. Must be
-// at least 4. Must not exceed 31 without changing the coding of length.
-#define UNICHAR_LEN 30
-
-// A UNICHAR_ID is the unique id of a unichar.
-using UNICHAR_ID = int;
-
-// A variable to indicate an invalid or uninitialized unichar id.
-static const int INVALID_UNICHAR_ID = -1;
-// A special unichar that corresponds to INVALID_UNICHAR_ID.
-static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
-
-enum StrongScriptDirection {
-  DIR_NEUTRAL = 0,       // Text contains only neutral characters.
-  DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
-  DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
-  DIR_MIX = 3,           // Text contains a mixture of left-to-right
-                         // and right-to-left characters.
-};
-
-using char32 = signed int;
-
-// The UNICHAR class holds a single classification result. This may be
-// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
-// multiple Unicode characters representing the NFKC expansion of a ligature
-// such as fi, ffl etc. These are also stored as utf8.
-class TESS_API UNICHAR {
-public:
-  UNICHAR() {
-    memset(chars, 0, UNICHAR_LEN);
-  }
-
-  // Construct from a utf8 string. If len<0 then the string is null terminated.
-  // If the string is too long to fit in the UNICHAR then it takes only what
-  // will fit.
-  UNICHAR(const char *utf8_str, int len);
-
-  // Construct from a single UCS4 character.
-  explicit UNICHAR(int unicode);
-
-  // Default copy constructor and operator= are OK.
-
-  // Get the first character as UCS-4.
-  int first_uni() const;
-
-  // Get the length of the UTF8 string.
-  int utf8_len() const {
-    int len = chars[UNICHAR_LEN - 1];
-    return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
-  }
-
-  // Get a UTF8 string, but NOT nullptr terminated.
-  const char *utf8() const {
-    return chars;
-  }
-
-  // Get a terminated UTF8 string: Must delete[] it after use.
-  char *utf8_str() const;
-
-  // Get the number of bytes in the first character of the given utf8 string.
-  static int utf8_step(const char *utf8_str);
-
-  // A class to simplify iterating over and accessing elements of a UTF8
-  // string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
-  // take ownership of the underlying byte array. It also does not permit
-  // modification of the array (as the name suggests).
-  //
-  // Example:
-  //   for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
-  //        it != UNICHAR::end(str, len);
-  //        ++it) {
-  //     printf("UCS-4 symbol code = %d\n", *it);
-  //     char buf[5];
-  //     int char_len = it.get_utf8(buf); buf[char_len] = '\0';
-  //     printf("Char = %s\n", buf);
-  //   }
-  class TESS_API const_iterator {
-    using CI = const_iterator;
-
-  public:
-    // Step to the next UTF8 character.
-    // If the current position is at an illegal UTF8 character, then print an
-    // error message and step by one byte. If the current position is at a
-    // nullptr value, don't step past it.
-    const_iterator &operator++();
-
-    // Return the UCS-4 value at the current position.
-    // If the current position is at an illegal UTF8 value, return a single
-    // space character.
-    int operator*() const;
-
-    // Store the UTF-8 encoding of the current codepoint into buf, which must be
-    // at least 4 bytes long. Return the number of bytes written.
-    // If the current position is at an illegal UTF8 value, writes a single
-    // space character and returns 1.
-    // Note that this method does not null-terminate the buffer.
-    int get_utf8(char *buf) const;
-    // Returns the number of bytes of the current codepoint. Returns 1 if the
-    // current position is at an illegal UTF8 value.
-    int utf8_len() const;
-    // Returns true if the UTF-8 encoding at the current position is legal.
-    bool is_legal() const;
-
-    // Return the pointer into the string at the current position.
-    const char *utf8_data() const {
-      return it_;
-    }
-
-    // Iterator equality operators.
-    friend bool operator==(const CI &lhs, const CI &rhs) {
-      return lhs.it_ == rhs.it_;
-    }
-    friend bool operator!=(const CI &lhs, const CI &rhs) {
-      return !(lhs == rhs);
-    }
-
-  private:
-    friend class UNICHAR;
-    explicit const_iterator(const char *it) : it_(it) {}
-
-    const char *it_; // Pointer into the string.
-  };
-
-  // Create a start/end iterator pointing to a string. Note that these methods
-  // are static and do NOT create a copy or take ownership of the underlying
-  // array.
-  static const_iterator begin(const char *utf8_str, int byte_length);
-  static const_iterator end(const char *utf8_str, int byte_length);
-
-  // Converts a utf-8 string to a vector of unicodes.
-  // Returns an empty vector if the input contains invalid UTF-8.
-  static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
-  // Converts a vector of unicodes to a utf8 string.
-  // Returns an empty string if the input contains an invalid unicode.
-  static std::string UTF32ToUTF8(const std::vector<char32> &str32);
-
-private:
-  // A UTF-8 representation of 1 or more Unicode characters.
-  // The last element (chars[UNICHAR_LEN - 1]) is a length if
-  // its value < UNICHAR_LEN, otherwise it is a genuine character.
-  char chars[UNICHAR_LEN]{};
-};
-
-} // namespace tesseract
-
-#endif // TESSERACT_CCUTIL_UNICHAR_H_
--- a/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/version.h
+++ b/third_party/ocr/tesseract-ocr/uos/loongarch64/include/tesseract/version.h
@ -1,34 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        version.h
-// Description: Version information
-//
-// (C) Copyright 2018, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_VERSION_H_
-#define TESSERACT_API_VERSION_H_
-
-// clang-format off
-
-#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
-#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
-#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
-
-#define TESSERACT_VERSION          \
-  (TESSERACT_MAJOR_VERSION << 16 | \
-   TESSERACT_MINOR_VERSION <<  8 | \
-   TESSERACT_MICRO_VERSION)
-
-#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
-
-// clang-format on
-
-#endif // TESSERACT_API_VERSION_H_
--- a/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/baseapi.h
+++ b/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/baseapi.h
@ -1,812 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        baseapi.h
-// Description: Simple API for calling tesseract.
-// Author:      Ray Smith
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_BASEAPI_H_
-#define TESSERACT_API_BASEAPI_H_
-
-#ifdef HAVE_CONFIG_H
-#  include "config_auto.h" // DISABLED_LEGACY_ENGINE
-#endif
-
-#include "export.h"
-#include "pageiterator.h"
-#include "publictypes.h"
-#include "resultiterator.h"
-#include "unichar.h"
-
-#include "version.h"
-
-#include <cstdio>
-#include <vector> // for std::vector
-
-struct Pix;
-struct Pixa;
-struct Boxa;
-
-namespace tesseract {
-
-class PAGE_RES;
-class ParagraphModel;
-class BLOCK_LIST;
-class ETEXT_DESC;
-struct OSResults;
-class UNICHARSET;
-
-class Dawg;
-class Dict;
-class EquationDetect;
-class PageIterator;
-class ImageThresholder;
-class LTRResultIterator;
-class ResultIterator;
-class MutableIterator;
-class TessResultRenderer;
-class Tesseract;
-
-// Function to read a std::vector<char> from a whole file.
-// Returns false on failure.
-using FileReader = bool (*)(const char *filename, std::vector<char> *data);
-
-using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
-                               bool) const;
-using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
-                                                  int, const char *, int);
-
-/**
- * Base class for all tesseract APIs.
- * Specific classes can add ability to work on different inputs or produce
- * different outputs.
- * This class is mostly an interface layer on top of the Tesseract instance
- * class to hide the data types so that users of this class don't have to
- * include any other Tesseract headers.
- */
-class TESS_API TessBaseAPI {
-public:
-  TessBaseAPI();
-  virtual ~TessBaseAPI();
-  // Copy constructor and assignment operator are currently unsupported.
-  TessBaseAPI(TessBaseAPI const &) = delete;
-  TessBaseAPI &operator=(TessBaseAPI const &) = delete;
-
-  /**
-   * Returns the version identifier as a static string. Do not delete.
-   */
-  static const char *Version();
-
-  /**
-   * If compiled with OpenCL AND an available OpenCL
-   * device is deemed faster than serial code, then
-   * "device" is populated with the cl_device_id
-   * and returns sizeof(cl_device_id)
-   * otherwise *device=nullptr and returns 0.
-   */
-  static size_t getOpenCLDevice(void **device);
-
-  /**
-   * Set the name of the input file. Needed for training and
-   * reading a UNLV zone file, and for searchable PDF output.
-   */
-  void SetInputName(const char *name);
-  /**
-   * These functions are required for searchable PDF output.
-   * We need our hands on the input file so that we can include
-   * it in the PDF without transcoding. If that is not possible,
-   * we need the original image. Finally, resolution metadata
-   * is stored in the PDF so we need that as well.
-   */
-  const char *GetInputName();
-  // Takes ownership of the input pix.
-  void SetInputImage(Pix *pix);
-  Pix *GetInputImage();
-  int GetSourceYResolution();
-  const char *GetDatapath();
-
-  /** Set the name of the bonus output files. Needed only for debugging. */
-  void SetOutputName(const char *name);
-
-  /**
-   * Set the value of an internal "parameter."
-   * Supply the name of the parameter and the value as a string, just as
-   * you would in a config file.
-   * Returns false if the name lookup failed.
-   * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
-   * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
-   * SetVariable may be used before Init, but settings will revert to
-   * defaults on End().
-   *
-   * Note: Must be called after Init(). Only works for non-init variables
-   * (init variables should be passed to Init()).
-   */
-  bool SetVariable(const char *name, const char *value);
-  bool SetDebugVariable(const char *name, const char *value);
-
-  /**
-   * Returns true if the parameter was found among Tesseract parameters.
-   * Fills in value with the value of the parameter.
-   */
-  bool GetIntVariable(const char *name, int *value) const;
-  bool GetBoolVariable(const char *name, bool *value) const;
-  bool GetDoubleVariable(const char *name, double *value) const;
-
-  /**
-   * Returns the pointer to the string that represents the value of the
-   * parameter if it was found among Tesseract parameters.
-   */
-  const char *GetStringVariable(const char *name) const;
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-  /**
-   * Print Tesseract fonts table to the given file.
-   */
-  void PrintFontsTable(FILE *fp) const;
-
-#endif
-
-  /**
-   * Print Tesseract parameters to the given file.
-   */
-  void PrintVariables(FILE *fp) const;
-
-  /**
-   * Get value of named variable as a string, if it exists.
-   */
-  bool GetVariableAsString(const char *name, std::string *val) const;
-
-  /**
-   * Instances are now mostly thread-safe and totally independent,
-   * but some global parameters remain. Basically it is safe to use multiple
-   * TessBaseAPIs in different threads in parallel, UNLESS:
-   * you use SetVariable on some of the Params in classify and textord.
-   * If you do, then the effect will be to change it for all your instances.
-   *
-   * Start tesseract. Returns zero on success and -1 on failure.
-   * NOTE that the only members that may be called before Init are those
-   * listed above here in the class definition.
-   *
-   * The datapath must be the name of the tessdata directory.
-   * The language is (usually) an ISO 639-3 string or nullptr will default to
-   * eng. It is entirely safe (and eventually will be efficient too) to call
-   * Init multiple times on the same instance to change language, or just
-   * to reset the classifier.
-   * The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
-   * that multiple languages are to be loaded. Eg hin+eng will load Hindi and
-   * English. Languages may specify internally that they want to be loaded
-   * with one or more other languages, so the ~ sign is available to override
-   * that. Eg if hin were set to load eng by default, then hin+~eng would force
-   * loading only hin. The number of loaded languages is limited only by
-   * memory, with the caveat that loading additional languages will impact
-   * both speed and accuracy, as there is more work to do to decide on the
-   * applicable language, and there is more chance of hallucinating incorrect
-   * words.
-   * WARNING: On changing languages, all Tesseract parameters are reset
-   * back to their default values. (Which may vary between languages.)
-   * If you have a rare need to set a Variable that controls
-   * initialization for a second call to Init you should explicitly
-   * call End() and then use SetVariable before Init. This is only a very
-   * rare use case, since there are very few uses that require any parameters
-   * to be set before Init.
-   *
-   * If set_only_non_debug_params is true, only params that do not contain
-   * "debug" in the name will be set.
-   */
-  int Init(const char *datapath, const char *language, OcrEngineMode mode,
-           char **configs, int configs_size,
-           const std::vector<std::string> *vars_vec,
-           const std::vector<std::string> *vars_values,
-           bool set_only_non_debug_params);
-  int Init(const char *datapath, const char *language, OcrEngineMode oem) {
-    return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
-  }
-  int Init(const char *datapath, const char *language) {
-    return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
-                false);
-  }
-  // In-memory version reads the traineddata file directly from the given
-  // data[data_size] array, and/or reads data via a FileReader.
-  int Init(const char *data, int data_size, const char *language,
-           OcrEngineMode mode, char **configs, int configs_size,
-           const std::vector<std::string> *vars_vec,
-           const std::vector<std::string> *vars_values,
-           bool set_only_non_debug_params, FileReader reader);
-
-  /**
-   * Returns the languages string used in the last valid initialization.
-   * If the last initialization specified "deu+hin" then that will be
-   * returned. If hin loaded eng automatically as well, then that will
-   * not be included in this list. To find the languages actually
-   * loaded use GetLoadedLanguagesAsVector.
-   * The returned string should NOT be deleted.
-   */
-  const char *GetInitLanguagesAsString() const;
-
-  /**
-   * Returns the loaded languages in the vector of std::string.
-   * Includes all languages loaded by the last Init, including those loaded
-   * as dependencies of other loaded languages.
-   */
-  void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
-
-  /**
-   * Returns the available languages in the sorted vector of std::string.
-   */
-  void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
-
-  /**
-   * Init only for page layout analysis. Use only for calls to SetImage and
-   * AnalysePage. Calls that attempt recognition will generate an error.
-   */
-  void InitForAnalysePage();
-
-  /**
-   * Read a "config" file containing a set of param, value pairs.
-   * Searches the standard places: tessdata/configs, tessdata/tessconfigs
-   * and also accepts a relative or absolute path name.
-   * Note: only non-init params will be set (init params are set by Init()).
-   */
-  void ReadConfigFile(const char *filename);
-  /** Same as above, but only set debug params from the given config file. */
-  void ReadDebugConfigFile(const char *filename);
-
-  /**
-   * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
-   * The mode is stored as an IntParam so it can also be modified by
-   * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
-   */
-  void SetPageSegMode(PageSegMode mode);
-
-  /** Return the current page segmentation mode. */
-  PageSegMode GetPageSegMode() const;
-
-  /**
-   * Recognize a rectangle from an image and return the result as a string.
-   * May be called many times for a single Init.
-   * Currently has no error checking.
-   * Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
-   * Palette color images will not work properly and must be converted to
-   * 24 bit.
-   * Binary images of 1 bit per pixel may also be given but they must be
-   * byte packed with the MSB of the first byte being the first pixel, and a
-   * 1 represents WHITE. For binary images set bytes_per_pixel=0.
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   *
-   * Note that TesseractRect is the simplified convenience interface.
-   * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
-   * and one or more of the Get*Text functions below.
-   */
-  char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
-                      int bytes_per_line, int left, int top, int width,
-                      int height);
-
-  /**
-   * Call between pages or documents etc to free up memory and forget
-   * adaptive data.
-   */
-  void ClearAdaptiveClassifier();
-
-  /**
-   * @defgroup AdvancedAPI Advanced API
-   * The following methods break TesseractRect into pieces, so you can
-   * get hold of the thresholded image, get the text in different formats,
-   * get bounding boxes, confidences etc.
-   */
-  /* @{ */
-
-  /**
-   * Provide an image for Tesseract to recognize. Format is as
-   * TesseractRect above. Copies the image buffer and converts to Pix.
-   * SetImage clears all recognition results, and sets the rectangle to the
-   * full image, so it may be followed immediately by a GetUTF8Text, and it
-   * will automatically perform recognition.
-   */
-  void SetImage(const unsigned char *imagedata, int width, int height,
-                int bytes_per_pixel, int bytes_per_line);
-
-  /**
-   * Provide an image for Tesseract to recognize. As with SetImage above,
-   * Tesseract takes its own copy of the image, so it need not persist until
-   * after Recognize.
-   * Pix vs raw, which to use?
-   * Use Pix where possible. Tesseract uses Pix as its internal representation
-   * and it is therefore more efficient to provide a Pix directly.
-   */
-  void SetImage(Pix *pix);
-
-  /**
-   * Set the resolution of the source image in pixels per inch so font size
-   * information can be calculated in results.  Call this after SetImage().
-   */
-  void SetSourceResolution(int ppi);
-
-  /**
-   * Restrict recognition to a sub-rectangle of the image. Call after SetImage.
-   * Each SetRectangle clears the recogntion results so multiple rectangles
-   * can be recognized with the same image.
-   */
-  void SetRectangle(int left, int top, int width, int height);
-
-  /**
-   * Get a copy of the internal thresholded image from Tesseract.
-   * Caller takes ownership of the Pix and must pixDestroy it.
-   * May be called any time after SetImage, or after TesseractRect.
-   */
-  Pix *GetThresholdedImage();
-
-  /**
-   * Get the result of page layout analysis as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   */
-  Boxa *GetRegions(Pixa **pixa);
-
-  /**
-   * Get the textlines as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * If raw_image is true, then extract from the original image instead of the
-   * thresholded image and pad by raw_padding pixels.
-   * If blockids is not nullptr, the block-id of each line is also returned as
-   * an array of one element per line. delete [] after use. If paraids is not
-   * nullptr, the paragraph-id of each line within its block is also returned as
-   * an array of one element per line. delete [] after use.
-   */
-  Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
-                     int **blockids, int **paraids);
-  /*
-   Helper method to extract from the thresholded image. (most common usage)
-*/
-  Boxa *GetTextlines(Pixa **pixa, int **blockids) {
-    return GetTextlines(false, 0, pixa, blockids, nullptr);
-  }
-
-  /**
-   * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
-   * pair, in reading order. Enables downstream handling of non-rectangular
-   * regions.
-   * Can be called before or after Recognize.
-   * If blockids is not nullptr, the block-id of each line is also returned as
-   * an array of one element per line. delete [] after use.
-   */
-  Boxa *GetStrips(Pixa **pixa, int **blockids);
-
-  /**
-   * Get the words as a leptonica-style
-   * Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   */
-  Boxa *GetWords(Pixa **pixa);
-
-  /**
-   * Gets the individual connected (text) components (created
-   * after pages segmentation step, but before recognition)
-   * as a leptonica-style Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * Note: the caller is responsible for calling boxaDestroy()
-   * on the returned Boxa array and pixaDestroy() on cc array.
-   */
-  Boxa *GetConnectedComponents(Pixa **cc);
-
-  /**
-   * Get the given level kind of components (block, textline, word etc.) as a
-   * leptonica-style Boxa, Pixa pair, in reading order.
-   * Can be called before or after Recognize.
-   * If blockids is not nullptr, the block-id of each component is also returned
-   * as an array of one element per component. delete [] after use.
-   * If blockids is not nullptr, the paragraph-id of each component with its
-   * block is also returned as an array of one element per component. delete []
-   * after use. If raw_image is true, then portions of the original image are
-   * extracted instead of the thresholded image and padded with raw_padding. If
-   * text_only is true, then only text components are returned.
-   */
-  Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
-                           bool raw_image, int raw_padding, Pixa **pixa,
-                           int **blockids, int **paraids);
-  // Helper function to get binary images with no padding (most common usage).
-  Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
-                           Pixa **pixa, int **blockids) {
-    return GetComponentImages(level, text_only, false, 0, pixa, blockids,
-                              nullptr);
-  }
-
-  /**
-   * Returns the scale factor of the thresholded image that would be returned by
-   * GetThresholdedImage() and the various GetX() methods that call
-   * GetComponentImages().
-   * Returns 0 if no thresholder has been set.
-   */
-  int GetThresholdedImageScaleFactor() const;
-
-  /**
-   * Runs page layout analysis in the mode set by SetPageSegMode.
-   * May optionally be called prior to Recognize to get access to just
-   * the page layout results. Returns an iterator to the results.
-   * If merge_similar_words is true, words are combined where suitable for use
-   * with a line recognizer. Use if you want to use AnalyseLayout to find the
-   * textlines, and then want to process textline fragments with an external
-   * line recognizer.
-   * Returns nullptr on error or an empty page.
-   * The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  PageIterator *AnalyseLayout();
-  PageIterator *AnalyseLayout(bool merge_similar_words);
-
-  /**
-   * Recognize the image from SetAndThresholdImage, generating Tesseract
-   * internal structures. Returns 0 on success.
-   * Optional. The Get*Text functions below will call Recognize if needed.
-   * After Recognize, the output is kept internally until the next SetImage.
-   */
-  int Recognize(ETEXT_DESC *monitor);
-
-  /**
-   * Methods to retrieve information after SetAndThresholdImage(),
-   * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
-   */
-
-  /**
-   * Turns images into symbolic text.
-   *
-   * filename can point to a single image, a multi-page TIFF,
-   * or a plain text list of image filenames.
-   *
-   * retry_config is useful for debugging. If not nullptr, you can fall
-   * back to an alternate configuration if a page fails for some
-   * reason.
-   *
-   * timeout_millisec terminates processing if any single page
-   * takes too long. Set to 0 for unlimited time.
-   *
-   * renderer is responible for creating the output. For example,
-   * use the TessTextRenderer if you want plaintext output, or
-   * the TessPDFRender to produce searchable PDF.
-   *
-   * If tessedit_page_number is non-negative, will only process that
-   * single page. Works for multi-page tiff file, or filelist.
-   *
-   * Returns true if successful, false on error.
-   */
-  bool ProcessPages(const char *filename, const char *retry_config,
-                    int timeout_millisec, TessResultRenderer *renderer);
-  // Does the real work of ProcessPages.
-  bool ProcessPagesInternal(const char *filename, const char *retry_config,
-                            int timeout_millisec, TessResultRenderer *renderer);
-
-  /**
-   * Turn a single image into symbolic text.
-   *
-   * The pix is the image processed. filename and page_index are
-   * metadata used by side-effect processes, such as reading a box
-   * file or formatting as hOCR.
-   *
-   * See ProcessPages for descriptions of other parameters.
-   */
-  bool ProcessPage(Pix *pix, int page_index, const char *filename,
-                   const char *retry_config, int timeout_millisec,
-                   TessResultRenderer *renderer);
-
-  /**
-   * Get a reading-order iterator to the results of LayoutAnalysis and/or
-   * Recognize. The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  ResultIterator *GetIterator();
-
-  /**
-   * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
-   * The returned iterator must be deleted after use.
-   * WARNING! This class points to data held within the TessBaseAPI class, and
-   * therefore can only be used while the TessBaseAPI class still exists and
-   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-   * DetectOS, or anything else that changes the internal PAGE_RES.
-   */
-  MutableIterator *GetMutableIterator();
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   */
-  char *GetUTF8Text();
-
-  /**
-   * Make a HTML-formatted string with hOCR markup from the internal
-   * data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * monitor can be used to
-   *  cancel the recognition
-   *  receive progress callbacks
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
-
-  /**
-   * Make a HTML-formatted string with hOCR markup from the internal
-   * data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetHOCRText(int page_number);
-
-  /**
-   * Make an XML-formatted string with Alto markup from the internal
-   * data structures.
-   */
-  char *GetAltoText(ETEXT_DESC *monitor, int page_number);
-
-  /**
-   * Make an XML-formatted string with Alto markup from the internal
-   * data structures.
-   */
-  char *GetAltoText(int page_number);
-
-  /**
-   * Make a TSV-formatted string from the internal data structures.
-   * page_number is 0-based but will appear in the output as 1-based.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetTSVText(int page_number);
-
-  /**
-   * Make a box file for LSTM training from the internal data structures.
-   * Constructs coordinates in the original image - not just the rectangle.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetLSTMBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded in the same
-   * format as a box file used in training.
-   * Constructs coordinates in the original image - not just the rectangle.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded in the same
-   * format as a WordStr box file used in training.
-   * page_number is a 0-based page index that will appear in the box file.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetWordStrBoxText(int page_number);
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UNLV format Latin-1 with specific reject and suspect codes.
-   * Returned string must be freed with the delete [] operator.
-   */
-  char *GetUNLVText();
-
-  /**
-   * Detect the orientation of the input image and apparent script (alphabet).
-   * orient_deg is the detected clockwise rotation of the input image in degrees
-   * (0, 90, 180, 270)
-   * orient_conf is the confidence (15.0 is reasonably confident)
-   * script_name is an ASCII string, the name of the script, e.g. "Latin"
-   * script_conf is confidence level in the script
-   * Returns true on success and writes values to each parameter as an output
-   */
-  bool DetectOrientationScript(int *orient_deg, float *orient_conf,
-                               const char **script_name, float *script_conf);
-
-  /**
-   * The recognized text is returned as a char* which is coded
-   * as UTF8 and must be freed with the delete [] operator.
-   * page_number is a 0-based page index that will appear in the osd file.
-   */
-  char *GetOsdText(int page_number);
-
-  /** Returns the (average) confidence value between 0 and 100. */
-  int MeanTextConf();
-  /**
-   * Returns all word confidences (between 0 and 100) in an array, terminated
-   * by -1.  The calling function must delete [] after use.
-   * The number of confidences should correspond to the number of space-
-   * delimited words in GetUTF8Text.
-   */
-  int *AllWordConfidences();
-
-#ifndef DISABLED_LEGACY_ENGINE
-  /**
-   * Applies the given word to the adaptive classifier if possible.
-   * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
-   * tell the boundaries of the graphemes.
-   * Assumes that SetImage/SetRectangle have been used to set the image
-   * to the given word. The mode arg should be PSM_SINGLE_WORD or
-   * PSM_CIRCLE_WORD, as that will be used to control layout analysis.
-   * The currently set PageSegMode is preserved.
-   * Returns false if adaption was not possible for some reason.
-   */
-  bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
-#endif //  ndef DISABLED_LEGACY_ENGINE
-
-  /**
-   * Free up recognition results and any stored image data, without actually
-   * freeing any recognition data that would be time-consuming to reload.
-   * Afterwards, you must call SetImage or TesseractRect before doing
-   * any Recognize or Get* operation.
-   */
-  void Clear();
-
-  /**
-   * Close down tesseract and free up all memory. End() is equivalent to
-   * destructing and reconstructing your TessBaseAPI.
-   * Once End() has been used, none of the other API functions may be used
-   * other than Init and anything declared above it in the class definition.
-   */
-  void End();
-
-  /**
-   * Clear any library-level memory caches.
-   * There are a variety of expensive-to-load constant data structures (mostly
-   * language dictionaries) that are cached globally -- surviving the Init()
-   * and End() of individual TessBaseAPI's.  This function allows the clearing
-   * of these caches.
-   **/
-  static void ClearPersistentCache();
-
-  /**
-   * Check whether a word is valid according to Tesseract's language model
-   * @return 0 if the word is invalid, non-zero if valid.
-   * @warning temporary! This function will be removed from here and placed
-   * in a separate API at some future time.
-   */
-  int IsValidWord(const char *word) const;
-  // Returns true if utf8_character is defined in the UniCharset.
-  bool IsValidCharacter(const char *utf8_character) const;
-
-  bool GetTextDirection(int *out_offset, float *out_slope);
-
-  /** Sets Dict::letter_is_okay_ function to point to the given function. */
-  void SetDictFunc(DictFunc f);
-
-  /** Sets Dict::probability_in_context_ function to point to the given
-   * function.
-   */
-  void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
-
-  /**
-   * Estimates the Orientation And Script of the image.
-   * @return true if the image was processed successfully.
-   */
-  bool DetectOS(OSResults *);
-
-  /**
-   * Return text orientation of each block as determined by an earlier run
-   * of layout analysis.
-   */
-  void GetBlockTextOrientations(int **block_orientation,
-                                bool **vertical_writing);
-
-  /** This method returns the string form of the specified unichar. */
-  const char *GetUnichar(int unichar_id) const;
-
-  /** Return the pointer to the i-th dawg loaded into tesseract_ object. */
-  const Dawg *GetDawg(int i) const;
-
-  /** Return the number of dawgs loaded into tesseract_ object. */
-  int NumDawgs() const;
-
-  Tesseract *tesseract() const {
-    return tesseract_;
-  }
-
-  OcrEngineMode oem() const {
-    return last_oem_requested_;
-  }
-
-  void set_min_orientation_margin(double margin);
-  /* @} */
-
-protected:
-  /** Common code for setting the image. Returns true if Init has been called.
-   */
-  bool InternalSetImage();
-
-  /**
-   * Run the thresholder to make the thresholded image. If pix is not nullptr,
-   * the source is thresholded to pix instead of the internal IMAGE.
-   */
-  virtual bool Threshold(Pix **pix);
-
-  /**
-   * Find lines from the image making the BLOCK_LIST.
-   * @return 0 on success.
-   */
-  int FindLines();
-
-  /** Delete the pageres and block list ready for a new page. */
-  void ClearResults();
-
-  /**
-   * Return an LTR Result Iterator -- used only for training, as we really want
-   * to ignore all BiDi smarts at that point.
-   * delete once you're done with it.
-   */
-  LTRResultIterator *GetLTRIterator();
-
-  /**
-   * Return the length of the output text string, as UTF8, assuming
-   * one newline per line and one per block, with a terminator,
-   * and assuming a single character reject marker for each rejected character.
-   * Also return the number of recognized blobs in blob_count.
-   */
-  int TextLength(int *blob_count) const;
-
-  //// paragraphs.cpp ////////////////////////////////////////////////////
-  void DetectParagraphs(bool after_text_recognition);
-
-  const PAGE_RES *GetPageRes() const {
-    return page_res_;
-  }
-
-protected:
-  Tesseract *tesseract_;          ///< The underlying data object.
-  Tesseract *osd_tesseract_;      ///< For orientation & script detection.
-  EquationDetect *equ_detect_;    ///< The equation detector.
-  FileReader reader_;             ///< Reads files from any filesystem.
-  ImageThresholder *thresholder_; ///< Image thresholding module.
-  std::vector<ParagraphModel *> *paragraph_models_;
-  BLOCK_LIST *block_list_;           ///< The page layout.
-  PAGE_RES *page_res_;               ///< The page-level data.
-  std::string input_file_;           ///< Name used by training code.
-  std::string output_file_;          ///< Name used by debug code.
-  std::string datapath_;             ///< Current location of tessdata.
-  std::string language_;             ///< Last initialized language.
-  OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
-  bool recognition_done_;            ///< page_res_ contains recognition data.
-
-  /**
-   * @defgroup ThresholderParams Thresholder Parameters
-   * Parameters saved from the Thresholder. Needed to rebuild coordinates.
-   */
-  /* @{ */
-  int rect_left_;
-  int rect_top_;
-  int rect_width_;
-  int rect_height_;
-  int image_width_;
-  int image_height_;
-  /* @} */
-
-private:
-  // A list of image filenames gets special consideration
-  bool ProcessPagesFileList(FILE *fp, std::string *buf,
-                            const char *retry_config, int timeout_millisec,
-                            TessResultRenderer *renderer,
-                            int tessedit_page_number);
-  // TIFF supports multipage so gets special consideration.
-  bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
-                                 const char *filename, const char *retry_config,
-                                 int timeout_millisec,
-                                 TessResultRenderer *renderer,
-                                 int tessedit_page_number);
-}; // class TessBaseAPI.
-
-/** Escape a char string - remove &<>"' with HTML codes. */
-std::string HOcrEscape(const char *text);
-
-} // namespace tesseract
-
-#endif // TESSERACT_API_BASEAPI_H_
--- a/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/capi.h
+++ b/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/capi.h
@ -1,484 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        capi.h
-// Description: C-API TessBaseAPI
-//
-// (C) Copyright 2012, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef API_CAPI_H_
-#define API_CAPI_H_
-
-#include "export.h"
-
-#ifdef __cplusplus
-#  include <tesseract/baseapi.h>
-#  include <tesseract/ocrclass.h>
-#  include <tesseract/pageiterator.h>
-#  include <tesseract/renderer.h>
-#  include <tesseract/resultiterator.h>
-#endif
-
-#include <stdbool.h>
-#include <stdio.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef BOOL
-#  define BOOL int
-#  define TRUE 1
-#  define FALSE 0
-#endif
-
-#ifdef __cplusplus
-typedef tesseract::TessResultRenderer TessResultRenderer;
-typedef tesseract::TessBaseAPI TessBaseAPI;
-typedef tesseract::PageIterator TessPageIterator;
-typedef tesseract::ResultIterator TessResultIterator;
-typedef tesseract::MutableIterator TessMutableIterator;
-typedef tesseract::ChoiceIterator TessChoiceIterator;
-typedef tesseract::OcrEngineMode TessOcrEngineMode;
-typedef tesseract::PageSegMode TessPageSegMode;
-typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
-typedef tesseract::Orientation TessOrientation;
-typedef tesseract::ParagraphJustification TessParagraphJustification;
-typedef tesseract::WritingDirection TessWritingDirection;
-typedef tesseract::TextlineOrder TessTextlineOrder;
-typedef tesseract::PolyBlockType TessPolyBlockType;
-typedef tesseract::ETEXT_DESC ETEXT_DESC;
-#else
-typedef struct TessResultRenderer TessResultRenderer;
-typedef struct TessBaseAPI TessBaseAPI;
-typedef struct TessPageIterator TessPageIterator;
-typedef struct TessResultIterator TessResultIterator;
-typedef struct TessMutableIterator TessMutableIterator;
-typedef struct TessChoiceIterator TessChoiceIterator;
-typedef enum TessOcrEngineMode {
-  OEM_TESSERACT_ONLY,
-  OEM_LSTM_ONLY,
-  OEM_TESSERACT_LSTM_COMBINED,
-  OEM_DEFAULT
-} TessOcrEngineMode;
-typedef enum TessPageSegMode {
-  PSM_OSD_ONLY,
-  PSM_AUTO_OSD,
-  PSM_AUTO_ONLY,
-  PSM_AUTO,
-  PSM_SINGLE_COLUMN,
-  PSM_SINGLE_BLOCK_VERT_TEXT,
-  PSM_SINGLE_BLOCK,
-  PSM_SINGLE_LINE,
-  PSM_SINGLE_WORD,
-  PSM_CIRCLE_WORD,
-  PSM_SINGLE_CHAR,
-  PSM_SPARSE_TEXT,
-  PSM_SPARSE_TEXT_OSD,
-  PSM_RAW_LINE,
-  PSM_COUNT
-} TessPageSegMode;
-typedef enum TessPageIteratorLevel {
-  RIL_BLOCK,
-  RIL_PARA,
-  RIL_TEXTLINE,
-  RIL_WORD,
-  RIL_SYMBOL
-} TessPageIteratorLevel;
-typedef enum TessPolyBlockType {
-  PT_UNKNOWN,
-  PT_FLOWING_TEXT,
-  PT_HEADING_TEXT,
-  PT_PULLOUT_TEXT,
-  PT_EQUATION,
-  PT_INLINE_EQUATION,
-  PT_TABLE,
-  PT_VERTICAL_TEXT,
-  PT_CAPTION_TEXT,
-  PT_FLOWING_IMAGE,
-  PT_HEADING_IMAGE,
-  PT_PULLOUT_IMAGE,
-  PT_HORZ_LINE,
-  PT_VERT_LINE,
-  PT_NOISE,
-  PT_COUNT
-} TessPolyBlockType;
-typedef enum TessOrientation {
-  ORIENTATION_PAGE_UP,
-  ORIENTATION_PAGE_RIGHT,
-  ORIENTATION_PAGE_DOWN,
-  ORIENTATION_PAGE_LEFT
-} TessOrientation;
-typedef enum TessParagraphJustification {
-  JUSTIFICATION_UNKNOWN,
-  JUSTIFICATION_LEFT,
-  JUSTIFICATION_CENTER,
-  JUSTIFICATION_RIGHT
-} TessParagraphJustification;
-typedef enum TessWritingDirection {
-  WRITING_DIRECTION_LEFT_TO_RIGHT,
-  WRITING_DIRECTION_RIGHT_TO_LEFT,
-  WRITING_DIRECTION_TOP_TO_BOTTOM
-} TessWritingDirection;
-typedef enum TessTextlineOrder {
-  TEXTLINE_ORDER_LEFT_TO_RIGHT,
-  TEXTLINE_ORDER_RIGHT_TO_LEFT,
-  TEXTLINE_ORDER_TOP_TO_BOTTOM
-} TessTextlineOrder;
-typedef struct ETEXT_DESC ETEXT_DESC;
-#endif
-
-typedef bool (*TessCancelFunc)(void *cancel_this, int words);
-typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
-                                 int bottom);
-
-struct Pix;
-struct Boxa;
-struct Pixa;
-
-/* General free functions */
-
-TESS_API const char *TessVersion();
-TESS_API void TessDeleteText(const char *text);
-TESS_API void TessDeleteTextArray(char **arr);
-TESS_API void TessDeleteIntArray(const int *arr);
-
-/* Renderer API */
-TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
-                                                     BOOL font_info);
-TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
-                                                   const char *datadir,
-                                                   BOOL textonly);
-TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
-    const char *outputbase);
-
-TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
-TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
-                                       TessResultRenderer *next);
-TESS_API TessResultRenderer *TessResultRendererNext(
-    TessResultRenderer *renderer);
-TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
-                                              const char *title);
-TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
-                                         TessBaseAPI *api);
-TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
-
-TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
-TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
-TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
-
-/* Base API */
-
-TESS_API TessBaseAPI *TessBaseAPICreate();
-TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
-
-TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
-
-TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
-TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
-TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
-TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
-
-TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
-                                     const char *value);
-TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
-                                          const char *value);
-
-TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
-                                        const char *name, int *value);
-TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
-                                         const char *name, BOOL *value);
-TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
-                                           const char *name, double *value);
-TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
-                                                  const char *name);
-
-TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
-TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
-                                              const char *filename);
-
-TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode oem,
-                              char **configs, int configs_size);
-TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode oem);
-TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
-                              const char *language);
-
-TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
-                              const char *language, TessOcrEngineMode mode,
-                              char **configs, int configs_size, char **vars_vec,
-                              char **vars_values, size_t vars_vec_size,
-                              BOOL set_only_non_debug_params);
-
-TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
-                              const char *language, TessOcrEngineMode mode,
-                              char **configs, int configs_size, char **vars_vec,
-                              char **vars_values, size_t vars_vec_size,
-                              BOOL set_only_non_debug_params);
-
-TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
-    const TessBaseAPI *handle);
-TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
-    const TessBaseAPI *handle);
-TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
-    const TessBaseAPI *handle);
-
-TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
-                                        const char *filename);
-TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
-                                             const char *filename);
-
-TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
-                                        TessPageSegMode mode);
-TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
-
-TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
-                               const unsigned char *imagedata,
-                               int bytes_per_pixel, int bytes_per_line,
-                               int left, int top, int width, int height);
-
-TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
-
-TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
-                                  const unsigned char *imagedata, int width,
-                                  int height, int bytes_per_pixel,
-                                  int bytes_per_line);
-TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
-
-TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
-
-TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
-                                      int width, int height);
-
-TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
-TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
-                                            struct Pixa **pixa);
-TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
-                                              struct Pixa **pixa,
-                                              int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
-                                               BOOL raw_image, int raw_padding,
-                                               struct Pixa **pixa,
-                                               int **blockids, int **paraids);
-TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
-                                           struct Pixa **pixa, int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
-                                          struct Pixa **pixa);
-TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
-                                                        struct Pixa **cc);
-TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
-                                                    TessPageIteratorLevel level,
-                                                    BOOL text_only,
-                                                    struct Pixa **pixa,
-                                                    int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
-    TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
-    BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
-    int **paraids);
-
-TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
-    const TessBaseAPI *handle);
-
-TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
-
-TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
-                                      const char *retry_config,
-                                      int timeout_millisec,
-                                      TessResultRenderer *renderer);
-TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
-                                     int page_index, const char *filename,
-                                     const char *retry_config,
-                                     int timeout_millisec,
-                                     TessResultRenderer *renderer);
-
-TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
-TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
-    TessBaseAPI *handle);
-
-TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
-TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
-
-TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
-
-TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
-                                            int page_number);
-
-TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
-TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
-
-TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
-
-#ifndef DISABLED_LEGACY_ENGINE
-TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
-                                        TessPageSegMode mode,
-                                        const char *wordstr);
-#endif // #ifndef DISABLED_LEGACY_ENGINE
-
-TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
-TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
-
-TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
-TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
-                                          float *out_slope);
-
-TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
-
-TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-// Call TessDeleteText(*best_script_name) to free memory allocated by this
-// function
-TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
-                                                 int *orient_deg,
-                                                 float *orient_conf,
-                                                 const char **script_name,
-                                                 float *script_conf);
-#endif // #ifndef DISABLED_LEGACY_ENGINE
-
-TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
-                                                 double margin);
-
-TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
-
-TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
-
-TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
-                                               int **block_orientation,
-                                               bool **vertical_writing);
-
-/* Page iterator */
-
-TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
-
-TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
-
-TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
-
-TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
-                                   TessPageIteratorLevel level);
-
-TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
-                                              TessPageIteratorLevel level);
-
-TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
-                                               TessPageIteratorLevel level,
-                                               TessPageIteratorLevel element);
-
-TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
-                                          TessPageIteratorLevel level,
-                                          int *left, int *top, int *right,
-                                          int *bottom);
-
-TESS_API TessPolyBlockType
-TessPageIteratorBlockType(const TessPageIterator *handle);
-
-TESS_API struct Pix *TessPageIteratorGetBinaryImage(
-    const TessPageIterator *handle, TessPageIteratorLevel level);
-
-TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
-                                              TessPageIteratorLevel level,
-                                              int padding,
-                                              struct Pix *original_image,
-                                              int *left, int *top);
-
-TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
-                                       TessPageIteratorLevel level, int *x1,
-                                       int *y1, int *x2, int *y2);
-
-TESS_API void TessPageIteratorOrientation(
-    TessPageIterator *handle, TessOrientation *orientation,
-    TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
-    float *deskew_angle);
-
-TESS_API void TessPageIteratorParagraphInfo(
-    TessPageIterator *handle, TessParagraphJustification *justification,
-    BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
-
-/* Result iterator */
-
-TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
-TESS_API TessResultIterator *TessResultIteratorCopy(
-    const TessResultIterator *handle);
-TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
-    TessResultIterator *handle);
-TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
-    const TessResultIterator *handle);
-TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
-    const TessResultIterator *handle);
-
-TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
-                                     TessPageIteratorLevel level);
-TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
-                                             TessPageIteratorLevel level);
-TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
-                                            TessPageIteratorLevel level);
-TESS_API const char *TessResultIteratorWordRecognitionLanguage(
-    const TessResultIterator *handle);
-TESS_API const char *TessResultIteratorWordFontAttributes(
-    const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
-    BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
-    int *pointsize, int *font_id);
-
-TESS_API BOOL
-TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
-TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
-TESS_API BOOL
-TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
-
-TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
-TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
-TESS_API const char *TessChoiceIteratorGetUTF8Text(
-    const TessChoiceIterator *handle);
-TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
-
-/* Progress monitor */
-
-TESS_API ETEXT_DESC *TessMonitorCreate();
-TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
-                                       TessCancelFunc cancelFunc);
-TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
-TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
-                                         TessProgressFunc progressFunc);
-TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // API_CAPI_H_
--- a/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/export.h
+++ b/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/export.h
@ -1,37 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        export.h
-// Description: Place holder
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_PLATFORM_H_
-#define TESSERACT_PLATFORM_H_
-
-#ifndef TESS_API
-#  if defined(_WIN32) || defined(__CYGWIN__)
-#    if defined(TESS_EXPORTS)
-#      define TESS_API __declspec(dllexport)
-#    elif defined(TESS_IMPORTS)
-#      define TESS_API __declspec(dllimport)
-#    else
-#      define TESS_API
-#    endif
-#  else
-#    if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
-#      define TESS_API __attribute__((visibility("default")))
-#    else
-#      define TESS_API
-#    endif
-#  endif
-#endif
-
-#endif // TESSERACT_PLATFORM_H_
--- a/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/ltrresultiterator.h
+++ b/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/ltrresultiterator.h
@ -1,235 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        ltrresultiterator.h
-// Description: Iterator for tesseract results in strict left-to-right
-//              order that avoids using tesseract internal data structures.
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
-#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
-
-#include "export.h"       // for TESS_API
-#include "pageiterator.h" // for PageIterator
-#include "publictypes.h"  // for PageIteratorLevel
-#include "unichar.h"      // for StrongScriptDirection
-
-namespace tesseract {
-
-class BLOB_CHOICE_IT;
-class PAGE_RES;
-class WERD_RES;
-
-class Tesseract;
-
-// Class to iterate over tesseract results, providing access to all levels
-// of the page hierarchy, without including any tesseract headers or having
-// to handle any tesseract structures.
-// WARNING! This class points to data held within the TessBaseAPI class, and
-// therefore can only be used while the TessBaseAPI class still exists and
-// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
-// DetectOS, or anything else that changes the internal PAGE_RES.
-// See tesseract/publictypes.h for the definition of PageIteratorLevel.
-// See also base class PageIterator, which contains the bulk of the interface.
-// LTRResultIterator adds text-specific methods for access to OCR output.
-
-class TESS_API LTRResultIterator : public PageIterator {
-  friend class ChoiceIterator;
-
-public:
-  // page_res and tesseract come directly from the BaseAPI.
-  // The rectangle parameters are copied indirectly from the Thresholder,
-  // via the BaseAPI. They represent the coordinates of some rectangle in an
-  // original image (in top-left-origin coordinates) and therefore the top-left
-  // needs to be added to any output boxes in order to specify coordinates
-  // in the original image. See TessBaseAPI::SetRectangle.
-  // The scale and scaled_yres are in case the Thresholder scaled the image
-  // rectangle prior to thresholding. Any coordinates in tesseract's image
-  // must be divided by scale before adding (rect_left, rect_top).
-  // The scaled_yres indicates the effective resolution of the binary image
-  // that tesseract has been given by the Thresholder.
-  // After the constructor, Begin has already been called.
-  LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
-                    int scaled_yres, int rect_left, int rect_top,
-                    int rect_width, int rect_height);
-
-  ~LTRResultIterator() override;
-
-  // LTRResultIterators may be copied! This makes it possible to iterate over
-  // all the objects at a lower level, while maintaining an iterator to
-  // objects at a higher level. These constructors DO NOT CALL Begin, so
-  // iterations will continue from the location of src.
-  // TODO: For now the copy constructor and operator= only need the base class
-  // versions, but if new data members are added, don't forget to add them!
-
-  // ============= Moving around within the page ============.
-
-  // See PageIterator.
-
-  // ============= Accessing data ==============.
-
-  // Returns the null terminated UTF-8 encoded text string for the current
-  // object at the given level. Use delete [] to free after use.
-  char *GetUTF8Text(PageIteratorLevel level) const;
-
-  // Set the string inserted at the end of each text line. "\n" by default.
-  void SetLineSeparator(const char *new_line);
-
-  // Set the string inserted at the end of each paragraph. "\n" by default.
-  void SetParagraphSeparator(const char *new_para);
-
-  // Returns the mean confidence of the current object at the given level.
-  // The number should be interpreted as a percent probability. (0.0f-100.0f)
-  float Confidence(PageIteratorLevel level) const;
-
-  // ============= Functions that refer to words only ============.
-
-  // Returns the font attributes of the current word. If iterating at a higher
-  // level object than words, eg textlines, then this will return the
-  // attributes of the first word in that textline.
-  // The actual return value is a string representing a font name. It points
-  // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
-  // the iterator itself, ie rendered invalid by various members of
-  // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
-  // Pointsize is returned in printers points (1/72 inch.)
-  const char *WordFontAttributes(bool *is_bold, bool *is_italic,
-                                 bool *is_underlined, bool *is_monospace,
-                                 bool *is_serif, bool *is_smallcaps,
-                                 int *pointsize, int *font_id) const;
-
-  // Return the name of the language used to recognize this word.
-  // On error, nullptr.  Do not delete this pointer.
-  const char *WordRecognitionLanguage() const;
-
-  // Return the overall directionality of this word.
-  StrongScriptDirection WordDirection() const;
-
-  // Returns true if the current word was found in a dictionary.
-  bool WordIsFromDictionary() const;
-
-  // Returns the number of blanks before the current word.
-  int BlanksBeforeWord() const;
-
-  // Returns true if the current word is numeric.
-  bool WordIsNumeric() const;
-
-  // Returns true if the word contains blamer information.
-  bool HasBlamerInfo() const;
-
-  // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
-  // of the current word.
-  const void *GetParamsTrainingBundle() const;
-
-  // Returns a pointer to the string with blamer information for this word.
-  // Assumes that the word's blamer_bundle is not nullptr.
-  const char *GetBlamerDebug() const;
-
-  // Returns a pointer to the string with misadaption information for this word.
-  // Assumes that the word's blamer_bundle is not nullptr.
-  const char *GetBlamerMisadaptionDebug() const;
-
-  // Returns true if a truth string was recorded for the current word.
-  bool HasTruthString() const;
-
-  // Returns true if the given string is equivalent to the truth string for
-  // the current word.
-  bool EquivalentToTruth(const char *str) const;
-
-  // Returns a null terminated UTF-8 encoded truth string for the current word.
-  // Use delete [] to free after use.
-  char *WordTruthUTF8Text() const;
-
-  // Returns a null terminated UTF-8 encoded normalized OCR string for the
-  // current word. Use delete [] to free after use.
-  char *WordNormedUTF8Text() const;
-
-  // Returns a pointer to serialized choice lattice.
-  // Fills lattice_size with the number of bytes in lattice data.
-  const char *WordLattice(int *lattice_size) const;
-
-  // ============= Functions that refer to symbols only ============.
-
-  // Returns true if the current symbol is a superscript.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsSuperscript() const;
-  // Returns true if the current symbol is a subscript.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsSubscript() const;
-  // Returns true if the current symbol is a dropcap.
-  // If iterating at a higher level object than symbols, eg words, then
-  // this will return the attributes of the first symbol in that word.
-  bool SymbolIsDropcap() const;
-
-protected:
-  const char *line_separator_;
-  const char *paragraph_separator_;
-};
-
-// Class to iterate over the classifier choices for a single RIL_SYMBOL.
-class TESS_API ChoiceIterator {
-public:
-  // Construction is from a LTRResultIterator that points to the symbol of
-  // interest. The ChoiceIterator allows a one-shot iteration over the
-  // choices for this symbol and after that it is useless.
-  explicit ChoiceIterator(const LTRResultIterator &result_it);
-  ~ChoiceIterator();
-
-  // Moves to the next choice for the symbol and returns false if there
-  // are none left.
-  bool Next();
-
-  // ============= Accessing data ==============.
-
-  // Returns the null terminated UTF-8 encoded text string for the current
-  // choice.
-  // NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
-  // internal structure and should NOT be delete[]ed to free after use.
-  const char *GetUTF8Text() const;
-
-  // Returns the confidence of the current choice depending on the used language
-  // data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
-  // choices for one symbol should roughly add up to 1.0f.
-  // If only traineddata of the legacy engine is used, the number should be
-  // interpreted as a percent probability. (0.0f-100.0f) In this case
-  // probabilities won't add up to 100. Each one stands on its own.
-  float Confidence() const;
-
-  // Returns a vector containing all timesteps, which belong to the currently
-  // selected symbol. A timestep is a vector containing pairs of symbols and
-  // floating point numbers. The number states the probability for the
-  // corresponding symbol.
-  std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
-
-private:
-  // clears the remaining spaces out of the results and adapt the probabilities
-  void filterSpaces();
-  // Pointer to the WERD_RES object owned by the API.
-  WERD_RES *word_res_;
-  // Iterator over the blob choices.
-  BLOB_CHOICE_IT *choice_it_;
-  std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
-  std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
-
-  const int *tstep_index_;
-  // regulates the rating granularity
-  double rating_coefficient_;
-  // leading blanks
-  int blanks_before_word_;
-  // true when there is lstm engine related trained data
-  bool oemLSTM_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/ocrclass.h
+++ b/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/ocrclass.h
@ -1,158 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-/**********************************************************************
- * File:        ocrclass.h
- * Description: Class definitions and constants for the OCR API.
- * Author:      Hewlett-Packard Co
- *
- * (C) Copyright 1996, Hewlett-Packard Co.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-/**********************************************************************
- * This file contains typedefs for all the structures used by
- * the HP OCR interface.
- * The structures are designed to allow them to be used with any
- * structure alignment up to 8.
- **********************************************************************/
-
-#ifndef CCUTIL_OCRCLASS_H_
-#define CCUTIL_OCRCLASS_H_
-
-#include <chrono>
-#include <ctime>
-
-namespace tesseract {
-
-/**********************************************************************
- * EANYCODE_CHAR
- * Description of a single character. The character code is defined by
- * the character set of the current font.
- * Output text is sent as an array of these structures.
- * Spaces and line endings in the output are represented in the
- * structures of the surrounding characters. They are not directly
- * represented as characters.
- * The first character in a word has a positive value of blanks.
- * Missing information should be set to the defaults in the comments.
- * If word bounds are known, but not character bounds, then the top and
- * bottom of each character should be those of the word. The left of the
- * first and right of the last char in each word should be set. All other
- * lefts and rights should be set to -1.
- * If set, the values of right and bottom are left+width and top+height.
- * Most of the members come directly from the parameters to ocr_append_char.
- * The formatting member uses the enhancement parameter and combines the
- * line direction stuff into the top 3 bits.
- * The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
- * 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
- * the coding is, only that it is backwards compatible with the previous
- * version.
- **********************************************************************/
-
-struct EANYCODE_CHAR { /*single character */
-  // It should be noted that the format for char_code for version 2.0 and beyond
-  // is UTF8 which means that ASCII characters will come out as one structure
-  // but other characters will be returned in two or more instances of this
-  // structure with a single byte of the  UTF8 code in each, but each will have
-  // the same bounding box. Programs which want to handle languagues with
-  // different characters sets will need to handle extended characters
-  // appropriately, but *all* code needs to be prepared to receive UTF8 coded
-  // characters for characters such as bullet and fancy quotes.
-  uint16_t char_code; /*character itself */
-  int16_t left;       /*of char (-1) */
-  int16_t right;      /*of char (-1) */
-  int16_t top;        /*of char (-1) */
-  int16_t bottom;     /*of char (-1) */
-  int16_t font_index; /*what font (0) */
-  uint8_t confidence; /*0=perfect, 100=reject (0/100) */
-  uint8_t point_size; /*of char, 72=i inch, (10) */
-  int8_t blanks;      /*no of spaces before this char (1) */
-  uint8_t formatting; /*char formatting (0) */
-};
-
-/**********************************************************************
- * ETEXT_DESC
- * Description of the output of the OCR engine.
- * This structure is used as both a progress monitor and the final
- * output header, since it needs to be a valid progress monitor while
- * the OCR engine is storing its output to shared memory.
- * During progress, all the buffer info is -1.
- * Progress starts at 0 and increases to 100 during OCR. No other constraint.
- * Additionally the progress callback contains the bounding box of the word that
- * is currently being processed.
- * Every progress callback, the OCR engine must set ocr_alive to 1.
- * The HP side will set ocr_alive to 0. Repeated failure to reset
- * to 1 indicates that the OCR engine is dead.
- * If the cancel function is not null then it is called with the number of
- * user words found. If it returns true then operation is cancelled.
- **********************************************************************/
-class ETEXT_DESC;
-
-using CANCEL_FUNC = bool (*)(void *, int);
-using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
-using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
-
-class ETEXT_DESC { // output header
-public:
-  int16_t count{0};    /// chars in this buffer(0)
-  int16_t progress{0}; /// percent complete increasing (0-100)
-  /** Progress monitor covers word recognition and it does not cover layout
-   * analysis.
-   * See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
-  int8_t more_to_come{0};       /// true if not last
-  volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
-  int8_t err_code{0};           /// for errcode use
-  CANCEL_FUNC cancel{nullptr};  /// returns true to cancel
-  PROGRESS_FUNC progress_callback{
-      nullptr};                      /// called whenever progress increases
-  PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
-  void *cancel_this{nullptr};        /// this or other data for cancel
-  std::chrono::steady_clock::time_point end_time;
-  /// Time to stop. Expected to be set only
-  /// by call to set_deadline_msecs().
-  EANYCODE_CHAR text[1]{}; /// character data
-
-  ETEXT_DESC() : progress_callback2(&default_progress_func) {
-    end_time = std::chrono::time_point<std::chrono::steady_clock,
-                                       std::chrono::milliseconds>();
-  }
-
-  // Sets the end time to be deadline_msecs milliseconds from now.
-  void set_deadline_msecs(int32_t deadline_msecs) {
-    if (deadline_msecs > 0) {
-      end_time = std::chrono::steady_clock::now() +
-                 std::chrono::milliseconds(deadline_msecs);
-    }
-  }
-
-  // Returns false if we've not passed the end_time, or have not set a deadline.
-  bool deadline_exceeded() const {
-    if (end_time.time_since_epoch() ==
-        std::chrono::steady_clock::duration::zero()) {
-      return false;
-    }
-    auto now = std::chrono::steady_clock::now();
-    return (now > end_time);
-  }
-
-private:
-  static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
-                                    int top, int bottom) {
-    if (ths->progress_callback != nullptr) {
-      return (*(ths->progress_callback))(ths->progress, left, right, top,
-                                         bottom);
-    }
-    return true;
-  }
-};
-
-} // namespace tesseract
-
-#endif // CCUTIL_OCRCLASS_H_
--- a/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/osdetect.h
+++ b/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/osdetect.h
@ -1,139 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        osdetect.h
-// Description: Orientation and script detection.
-// Author:      Samuel Charron
-//              Ranjith Unnikrishnan
-//
-// (C) Copyright 2008, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_OSDETECT_H_
-#define TESSERACT_CCMAIN_OSDETECT_H_
-
-#include "export.h" // for TESS_API
-
-#include <vector> // for std::vector
-
-namespace tesseract {
-
-class BLOBNBOX;
-class BLOBNBOX_CLIST;
-class BLOB_CHOICE_LIST;
-class TO_BLOCK_LIST;
-class UNICHARSET;
-
-class Tesseract;
-
-// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
-const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
-
-struct OSBestResult {
-  OSBestResult()
-      : orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
-  int orientation_id;
-  int script_id;
-  float sconfidence;
-  float oconfidence;
-};
-
-struct OSResults {
-  OSResults() : unicharset(nullptr) {
-    for (int i = 0; i < 4; ++i) {
-      for (int j = 0; j < kMaxNumberOfScripts; ++j) {
-        scripts_na[i][j] = 0;
-      }
-      orientations[i] = 0;
-    }
-  }
-  void update_best_orientation();
-  // Set the estimate of the orientation to the given id.
-  void set_best_orientation(int orientation_id);
-  // Update/Compute the best estimate of the script assuming the given
-  // orientation id.
-  void update_best_script(int orientation_id);
-  // Return the index of the script with the highest score for this orientation.
-  TESS_API int get_best_script(int orientation_id) const;
-  // Accumulate scores with given OSResults instance and update the best script.
-  void accumulate(const OSResults &osr);
-
-  // Print statistics.
-  void print_scores(void) const;
-  void print_scores(int orientation_id) const;
-
-  // Array holding scores for each orientation id [0,3].
-  // Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
-  // page respectively, where the values refer to the amount of clockwise
-  // rotation to be applied to the page for the text to be upright and readable.
-  float orientations[4];
-  // Script confidence scores for each of 4 possible orientations.
-  float scripts_na[4][kMaxNumberOfScripts];
-
-  UNICHARSET *unicharset;
-  OSBestResult best_result;
-};
-
-class OrientationDetector {
-public:
-  OrientationDetector(const std::vector<int> *allowed_scripts,
-                      OSResults *results);
-  bool detect_blob(BLOB_CHOICE_LIST *scores);
-  int get_orientation();
-
-private:
-  OSResults *osr_;
-  const std::vector<int> *allowed_scripts_;
-};
-
-class ScriptDetector {
-public:
-  ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
-                 tesseract::Tesseract *tess);
-  void detect_blob(BLOB_CHOICE_LIST *scores);
-  bool must_stop(int orientation) const;
-
-private:
-  OSResults *osr_;
-  static const char *korean_script_;
-  static const char *japanese_script_;
-  static const char *fraktur_script_;
-  int korean_id_;
-  int japanese_id_;
-  int katakana_id_;
-  int hiragana_id_;
-  int han_id_;
-  int hangul_id_;
-  int latin_id_;
-  int fraktur_id_;
-  tesseract::Tesseract *tess_;
-  const std::vector<int> *allowed_scripts_;
-};
-
-int orientation_and_script_detection(const char *filename, OSResults *,
-                                     tesseract::Tesseract *);
-
-int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
-              tesseract::Tesseract *tess);
-
-int os_detect_blobs(const std::vector<int> *allowed_scripts,
-                    BLOBNBOX_CLIST *blob_list, OSResults *osr,
-                    tesseract::Tesseract *tess);
-
-bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
-                    OSResults *, tesseract::Tesseract *tess);
-
-// Helper method to convert an orientation index to its value in degrees.
-// The value represents the amount of clockwise rotation in degrees that must be
-// applied for the text to be upright (readable).
-TESS_API int OrientationIdToValue(const int &id);
-
-} // namespace tesseract
-
-#endif // TESSERACT_CCMAIN_OSDETECT_H_
--- a/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/pageiterator.h
+++ b/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/pageiterator.h
@ -1,364 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        pageiterator.h
-// Description: Iterator for tesseract page structure that avoids using
-//              tesseract internal data structures.
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
-#define TESSERACT_CCMAIN_PAGEITERATOR_H_
-
-#include "export.h"
-#include "publictypes.h"
-
-struct Pix;
-struct Pta;
-
-namespace tesseract {
-
-struct BlamerBundle;
-class C_BLOB_IT;
-class PAGE_RES;
-class PAGE_RES_IT;
-class WERD;
-
-class Tesseract;
-
-/**
- * Class to iterate over tesseract page structure, providing access to all
- * levels of the page hierarchy, without including any tesseract headers or
- * having to handle any tesseract structures.
- * WARNING! This class points to data held within the TessBaseAPI class, and
- * therefore can only be used while the TessBaseAPI class still exists and
- * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
- * DetectOS, or anything else that changes the internal PAGE_RES.
- * See tesseract/publictypes.h for the definition of PageIteratorLevel.
- * See also ResultIterator, derived from PageIterator, which adds in the
- * ability to access OCR output with text-specific methods.
- */
-
-class TESS_API PageIterator {
-public:
-  /**
-   * page_res and tesseract come directly from the BaseAPI.
-   * The rectangle parameters are copied indirectly from the Thresholder,
-   * via the BaseAPI. They represent the coordinates of some rectangle in an
-   * original image (in top-left-origin coordinates) and therefore the top-left
-   * needs to be added to any output boxes in order to specify coordinates
-   * in the original image. See TessBaseAPI::SetRectangle.
-   * The scale and scaled_yres are in case the Thresholder scaled the image
-   * rectangle prior to thresholding. Any coordinates in tesseract's image
-   * must be divided by scale before adding (rect_left, rect_top).
-   * The scaled_yres indicates the effective resolution of the binary image
-   * that tesseract has been given by the Thresholder.
-   * After the constructor, Begin has already been called.
-   */
-  PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
-               int scaled_yres, int rect_left, int rect_top, int rect_width,
-               int rect_height);
-  virtual ~PageIterator();
-
-  /**
-   * Page/ResultIterators may be copied! This makes it possible to iterate over
-   * all the objects at a lower level, while maintaining an iterator to
-   * objects at a higher level. These constructors DO NOT CALL Begin, so
-   * iterations will continue from the location of src.
-   */
-  PageIterator(const PageIterator &src);
-  const PageIterator &operator=(const PageIterator &src);
-
-  /** Are we positioned at the same location as other? */
-  bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
-
-  // ============= Moving around within the page ============.
-
-  /**
-   * Moves the iterator to point to the start of the page to begin an
-   * iteration.
-   */
-  virtual void Begin();
-
-  /**
-   * Moves the iterator to the beginning of the paragraph.
-   * This class implements this functionality by moving it to the zero indexed
-   * blob of the first (leftmost) word on the first row of the paragraph.
-   */
-  virtual void RestartParagraph();
-
-  /**
-   * Return whether this iterator points anywhere in the first textline of a
-   * paragraph.
-   */
-  bool IsWithinFirstTextlineOfParagraph() const;
-
-  /**
-   * Moves the iterator to the beginning of the text line.
-   * This class implements this functionality by moving it to the zero indexed
-   * blob of the first (leftmost) word of the row.
-   */
-  virtual void RestartRow();
-
-  /**
-   * Moves to the start of the next object at the given level in the
-   * page hierarchy, and returns false if the end of the page was reached.
-   * NOTE that RIL_SYMBOL will skip non-text blocks, but all other
-   * PageIteratorLevel level values will visit each non-text block once.
-   * Think of non text blocks as containing a single para, with a single line,
-   * with a single imaginary word.
-   * Calls to Next with different levels may be freely intermixed.
-   * This function iterates words in right-to-left scripts correctly, if
-   * the appropriate language has been loaded into Tesseract.
-   */
-  virtual bool Next(PageIteratorLevel level);
-
-  /**
-   * Returns true if the iterator is at the start of an object at the given
-   * level.
-   *
-   * For instance, suppose an iterator it is pointed to the first symbol of the
-   * first word of the third line of the second paragraph of the first block in
-   * a page, then:
-   *   it.IsAtBeginningOf(RIL_BLOCK) = false
-   *   it.IsAtBeginningOf(RIL_PARA) = false
-   *   it.IsAtBeginningOf(RIL_TEXTLINE) = true
-   *   it.IsAtBeginningOf(RIL_WORD) = true
-   *   it.IsAtBeginningOf(RIL_SYMBOL) = true
-   */
-  virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
-
-  /**
-   * Returns whether the iterator is positioned at the last element in a
-   * given level. (e.g. the last word in a line, the last line in a block)
-   *
-   *     Here's some two-paragraph example
-   *   text.  It starts off innocuously
-   *   enough but quickly turns bizarre.
-   *     The author inserts a cornucopia
-   *   of words to guard against confused
-   *   references.
-   *
-   * Now take an iterator it pointed to the start of "bizarre."
-   *  it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
-   *  it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
-   *  it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
-   */
-  virtual bool IsAtFinalElement(PageIteratorLevel level,
-                                PageIteratorLevel element) const;
-
-  /**
-   * Returns whether this iterator is positioned
-   *   before other:   -1
-   *   equal to other:  0
-   *   after other:     1
-   */
-  int Cmp(const PageIterator &other) const;
-
-  // ============= Accessing data ==============.
-  // Coordinate system:
-  // Integer coordinates are at the cracks between the pixels.
-  // The top-left corner of the top-left pixel in the image is at (0,0).
-  // The bottom-right corner of the bottom-right pixel in the image is at
-  // (width, height).
-  // Every bounding box goes from the top-left of the top-left contained
-  // pixel to the bottom-right of the bottom-right contained pixel, so
-  // the bounding box of the single top-left pixel in the image is:
-  // (0,0)->(1,1).
-  // If an image rectangle has been set in the API, then returned coordinates
-  // relate to the original (full) image, rather than the rectangle.
-
-  /**
-   * Controls what to include in a bounding box. Bounding boxes of all levels
-   * between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
-   * Between layout analysis and recognition, it isn't known where all
-   * diacritics belong, so this control is used to include or exclude some
-   * diacritics that are above or below the main body of the word. In most cases
-   * where the placement is obvious, and after recognition, it doesn't make as
-   * much difference, as the diacritics will already be included in the word.
-   */
-  void SetBoundingBoxComponents(bool include_upper_dots,
-                                bool include_lower_dots) {
-    include_upper_dots_ = include_upper_dots;
-    include_lower_dots_ = include_lower_dots;
-  }
-
-  /**
-   * Returns the bounding rectangle of the current object at the given level.
-   * See comment on coordinate system above.
-   * Returns false if there is no such object at the current position.
-   * The returned bounding box is guaranteed to match the size and position
-   * of the image returned by GetBinaryImage, but may clip foreground pixels
-   * from a grey image. The padding argument to GetImage can be used to expand
-   * the image to include more foreground pixels. See GetImage below.
-   */
-  bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
-                   int *bottom) const;
-  bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
-                   int *right, int *bottom) const;
-  /**
-   * Returns the bounding rectangle of the object in a coordinate system of the
-   * working image rectangle having its origin at (rect_left_, rect_top_) with
-   * respect to the original image and is scaled by a factor scale_.
-   */
-  bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
-                           int *right, int *bottom) const;
-
-  /** Returns whether there is no object of a given level. */
-  bool Empty(PageIteratorLevel level) const;
-
-  /**
-   * Returns the type of the current block.
-   * See tesseract/publictypes.h for PolyBlockType.
-   */
-  PolyBlockType BlockType() const;
-
-  /**
-   * Returns the polygon outline of the current block. The returned Pta must
-   * be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
-   * of the polygon, and the last edge is the line segment between the last
-   * point and the first point. nullptr will be returned if the iterator is
-   * at the end of the document or layout analysis was not used.
-   */
-  Pta *BlockPolygon() const;
-
-  /**
-   * Returns a binary image of the current object at the given level.
-   * The position and size match the return from BoundingBoxInternal, and so
-   * this could be upscaled with respect to the original input image.
-   * Use pixDestroy to delete the image after use.
-   */
-  Pix *GetBinaryImage(PageIteratorLevel level) const;
-
-  /**
-   * Returns an image of the current object at the given level in greyscale
-   * if available in the input. To guarantee a binary image use BinaryImage.
-   * NOTE that in order to give the best possible image, the bounds are
-   * expanded slightly over the binary connected component, by the supplied
-   * padding, so the top-left position of the returned image is returned
-   * in (left,top). These will most likely not match the coordinates
-   * returned by BoundingBox.
-   * If you do not supply an original image, you will get a binary one.
-   * Use pixDestroy to delete the image after use.
-   */
-  Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
-                int *left, int *top) const;
-
-  /**
-   * Returns the baseline of the current object at the given level.
-   * The baseline is the line that passes through (x1, y1) and (x2, y2).
-   * WARNING: with vertical text, baselines may be vertical!
-   * Returns false if there is no baseline at the current position.
-   */
-  bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
-                int *y2) const;
-
-  // Returns the attributes of the current row.
-  void RowAttributes(float *row_height, float *descenders,
-                     float *ascenders) const;
-
-  /**
-   * Returns orientation for the block the iterator points to.
-   *   orientation, writing_direction, textline_order: see publictypes.h
-   *   deskew_angle: after rotating the block so the text orientation is
-   *                 upright, how many radians does one have to rotate the
-   *                 block anti-clockwise for it to be level?
-   *                   -Pi/4 <= deskew_angle <= Pi/4
-   */
-  void Orientation(tesseract::Orientation *orientation,
-                   tesseract::WritingDirection *writing_direction,
-                   tesseract::TextlineOrder *textline_order,
-                   float *deskew_angle) const;
-
-  /**
-   * Returns information about the current paragraph, if available.
-   *
-   *   justification -
-   *     LEFT if ragged right, or fully justified and script is left-to-right.
-   *     RIGHT if ragged left, or fully justified and script is right-to-left.
-   *     unknown if it looks like source code or we have very few lines.
-   *   is_list_item -
-   *     true if we believe this is a member of an ordered or unordered list.
-   *   is_crown -
-   *     true if the first line of the paragraph is aligned with the other
-   *     lines of the paragraph even though subsequent paragraphs have first
-   *     line indents.  This typically indicates that this is the continuation
-   *     of a previous paragraph or that it is the very first paragraph in
-   *     the chapter.
-   *   first_line_indent -
-   *     For LEFT aligned paragraphs, the first text line of paragraphs of
-   *     this kind are indented this many pixels from the left edge of the
-   *     rest of the paragraph.
-   *     for RIGHT aligned paragraphs, the first text line of paragraphs of
-   *     this kind are indented this many pixels from the right edge of the
-   *     rest of the paragraph.
-   *     NOTE 1: This value may be negative.
-   *     NOTE 2: if *is_crown == true, the first line of this paragraph is
-   *             actually flush, and first_line_indent is set to the "common"
-   *             first_line_indent for subsequent paragraphs in this block
-   *             of text.
-   */
-  void ParagraphInfo(tesseract::ParagraphJustification *justification,
-                     bool *is_list_item, bool *is_crown,
-                     int *first_line_indent) const;
-
-  // If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
-  // of the current word to the given pointer (takes ownership of the pointer)
-  // and returns true.
-  // Can only be used when iterating on the word level.
-  bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
-
-protected:
-  /**
-   * Sets up the internal data for iterating the blobs of a new word, then
-   * moves the iterator to the given offset.
-   */
-  void BeginWord(int offset);
-
-  /** Pointer to the page_res owned by the API. */
-  PAGE_RES *page_res_;
-  /** Pointer to the Tesseract object owned by the API. */
-  Tesseract *tesseract_;
-  /**
-   * The iterator to the page_res_. Owned by this ResultIterator.
-   * A pointer just to avoid dragging in Tesseract includes.
-   */
-  PAGE_RES_IT *it_;
-  /**
-   * The current input WERD being iterated. If there is an output from OCR,
-   * then word_ is nullptr. Owned by the API
-   */
-  WERD *word_;
-  /** The length of the current word_. */
-  int word_length_;
-  /** The current blob index within the word. */
-  int blob_index_;
-  /**
-   * Iterator to the blobs within the word. If nullptr, then we are iterating
-   * OCR results in the box_word.
-   * Owned by this ResultIterator.
-   */
-  C_BLOB_IT *cblob_it_;
-  /** Control over what to include in bounding boxes. */
-  bool include_upper_dots_;
-  bool include_lower_dots_;
-  /** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
-  int scale_;
-  int scaled_yres_;
-  int rect_left_;
-  int rect_top_;
-  int rect_width_;
-  int rect_height_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/publictypes.h
+++ b/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/publictypes.h
@ -1,281 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        publictypes.h
-// Description: Types used in both the API and internally
-// Author:      Ray Smith
-//
-// (C) Copyright 2010, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
-#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
-
-namespace tesseract {
-
-// This file contains types that are used both by the API and internally
-// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
-// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
-// Restated: It is OK for low-level Tesseract files to include publictypes.h,
-// but not for the low-level tesseract code to include top-level API code.
-// This file should not use other Tesseract types, as that would drag
-// their includes into the API-level.
-
-/** Number of printers' points in an inch. The unit of the pointsize return. */
-constexpr int kPointsPerInch = 72;
-/**
- * Minimum believable resolution. Used as a default if there is no other
- * information, as it is safer to under-estimate than over-estimate.
- */
-constexpr int kMinCredibleResolution = 70;
-/** Maximum believable resolution.  */
-constexpr int kMaxCredibleResolution = 2400;
-/**
- * Ratio between median blob size and likely resolution. Used to estimate
- * resolution when none is provided. This is basically 1/usual text size in
- * inches.  */
-constexpr int kResolutionEstimationFactor = 10;
-
-/**
- * Possible types for a POLY_BLOCK or ColPartition.
- * Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
- * below, as well as kPolyBlockNames in layout_test.cc.
- * Used extensively by ColPartition, and POLY_BLOCK.
- */
-enum PolyBlockType {
-  PT_UNKNOWN,         // Type is not yet known. Keep as the first element.
-  PT_FLOWING_TEXT,    // Text that lives inside a column.
-  PT_HEADING_TEXT,    // Text that spans more than one column.
-  PT_PULLOUT_TEXT,    // Text that is in a cross-column pull-out region.
-  PT_EQUATION,        // Partition belonging to an equation region.
-  PT_INLINE_EQUATION, // Partition has inline equation.
-  PT_TABLE,           // Partition belonging to a table region.
-  PT_VERTICAL_TEXT,   // Text-line runs vertically.
-  PT_CAPTION_TEXT,    // Text that belongs to an image.
-  PT_FLOWING_IMAGE,   // Image that lives inside a column.
-  PT_HEADING_IMAGE,   // Image that spans more than one column.
-  PT_PULLOUT_IMAGE,   // Image that is in a cross-column pull-out region.
-  PT_HORZ_LINE,       // Horizontal Line.
-  PT_VERT_LINE,       // Vertical Line.
-  PT_NOISE,           // Lies outside of any column.
-  PT_COUNT
-};
-
-/** Returns true if PolyBlockType is of horizontal line type */
-inline bool PTIsLineType(PolyBlockType type) {
-  return type == PT_HORZ_LINE || type == PT_VERT_LINE;
-}
-/** Returns true if PolyBlockType is of image type */
-inline bool PTIsImageType(PolyBlockType type) {
-  return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
-         type == PT_PULLOUT_IMAGE;
-}
-/** Returns true if PolyBlockType is of text type */
-inline bool PTIsTextType(PolyBlockType type) {
-  return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
-         type == PT_PULLOUT_TEXT || type == PT_TABLE ||
-         type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
-         type == PT_INLINE_EQUATION;
-}
-// Returns true if PolyBlockType is of pullout(inter-column) type
-inline bool PTIsPulloutType(PolyBlockType type) {
-  return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
-}
-
-/**
- *  +------------------+  Orientation Example:
- *  | 1 Aaaa Aaaa Aaaa |  ====================
- *  | Aaa aa aaa aa    |  To left is a diagram of some (1) English and
- *  | aaaaaa A aa aaa. |  (2) Chinese text and a (3) photo credit.
- *  |                2 |
- *  |   #######  c c C |  Upright Latin characters are represented as A and a.
- *  |   #######  c c c |  '<' represents a latin character rotated
- *  | < #######  c c c |      anti-clockwise 90 degrees.
- *  | < #######  c   c |
- *  | < #######  .   c |  Upright Chinese characters are represented C and c.
- *  | 3 #######      c |
- *  +------------------+  NOTA BENE: enum values here should match goodoc.proto
-
- * If you orient your head so that "up" aligns with Orientation,
- * then the characters will appear "right side up" and readable.
- *
- * In the example above, both the English and Chinese paragraphs are oriented
- * so their "up" is the top of the page (page up).  The photo credit is read
- * with one's head turned leftward ("up" is to page left).
- *
- * The values of this enum match the convention of Tesseract's osdetect.h
-*/
-enum Orientation {
-  ORIENTATION_PAGE_UP = 0,
-  ORIENTATION_PAGE_RIGHT = 1,
-  ORIENTATION_PAGE_DOWN = 2,
-  ORIENTATION_PAGE_LEFT = 3,
-};
-
-/**
- * The grapheme clusters within a line of text are laid out logically
- * in this direction, judged when looking at the text line rotated so that
- * its Orientation is "page up".
- *
- * For English text, the writing direction is left-to-right.  For the
- * Chinese text in the above example, the writing direction is top-to-bottom.
- */
-enum WritingDirection {
-  WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
-  WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
-  WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
-};
-
-/**
- * The text lines are read in the given sequence.
- *
- * In English, the order is top-to-bottom.
- * In Chinese, vertical text lines are read right-to-left.  Mongolian is
- * written in vertical columns top to bottom like Chinese, but the lines
- * order left-to right.
- *
- * Note that only some combinations make sense.  For example,
- * WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
- */
-enum TextlineOrder {
-  TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
-  TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
-  TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
-};
-
-/**
- * Possible modes for page layout analysis. These *must* be kept in order
- * of decreasing amount of layout analysis to be done, except for OSD_ONLY,
- * so that the inequality test macros below work.
- */
-enum PageSegMode {
-  PSM_OSD_ONLY = 0,      ///< Orientation and script detection only.
-  PSM_AUTO_OSD = 1,      ///< Automatic page segmentation with orientation and
-                         ///< script detection. (OSD)
-  PSM_AUTO_ONLY = 2,     ///< Automatic page segmentation, but no OSD, or OCR.
-  PSM_AUTO = 3,          ///< Fully automatic page segmentation, but no OSD.
-  PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
-  PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
-                                  ///< vertically aligned text.
-  PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
-  PSM_SINGLE_LINE = 7,  ///< Treat the image as a single text line.
-  PSM_SINGLE_WORD = 8,  ///< Treat the image as a single word.
-  PSM_CIRCLE_WORD = 9,  ///< Treat the image as a single word in a circle.
-  PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
-  PSM_SPARSE_TEXT =
-      11, ///< Find as much text as possible in no particular order.
-  PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
-  PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
-                     ///< hacks that are Tesseract-specific.
-
-  PSM_COUNT ///< Number of enum entries.
-};
-
-/**
- * Inline functions that act on a PageSegMode to determine whether components of
- * layout analysis are enabled.
- * *Depend critically on the order of elements of PageSegMode.*
- * NOTE that arg is an int for compatibility with INT_PARAM.
- */
-inline bool PSM_OSD_ENABLED(int pageseg_mode) {
-  return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
-  return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
-}
-inline bool PSM_SPARSE(int pageseg_mode) {
-  return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
-}
-inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
-  return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
-}
-inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
-  return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
-         pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
-}
-
-/**
- * enum of the elements of the page hierarchy, used in ResultIterator
- * to provide functions that operate on each level without having to
- * have 5x as many functions.
- */
-enum PageIteratorLevel {
-  RIL_BLOCK,    // Block of text/image/separator line.
-  RIL_PARA,     // Paragraph within a block.
-  RIL_TEXTLINE, // Line within a paragraph.
-  RIL_WORD,     // Word within a textline.
-  RIL_SYMBOL    // Symbol/character within a word.
-};
-
-/**
- * JUSTIFICATION_UNKNOWN
- *   The alignment is not clearly one of the other options.  This could happen
- *   for example if there are only one or two lines of text or the text looks
- *   like source code or poetry.
- *
- * NOTA BENE: Fully justified paragraphs (text aligned to both left and right
- *    margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
- *    is written with a left-to-right script and with JUSTIFICATION_RIGHT if
- *    their text is written in a right-to-left script.
- *
- * Interpretation for text read in vertical lines:
- *   "Left" is wherever the starting reading position is.
- *
- * JUSTIFICATION_LEFT
- *   Each line, except possibly the first, is flush to the same left tab stop.
- *
- * JUSTIFICATION_CENTER
- *   The text lines of the paragraph are centered about a line going
- *   down through their middle of the text lines.
- *
- * JUSTIFICATION_RIGHT
- *   Each line, except possibly the first, is flush to the same right tab stop.
- */
-enum ParagraphJustification {
-  JUSTIFICATION_UNKNOWN,
-  JUSTIFICATION_LEFT,
-  JUSTIFICATION_CENTER,
-  JUSTIFICATION_RIGHT,
-};
-
-/**
- * When Tesseract/Cube is initialized we can choose to instantiate/load/run
- * only the Tesseract part, only the Cube part or both along with the combiner.
- * The preference of which engine to use is stored in tessedit_ocr_engine_mode.
- *
- * ATTENTION: When modifying this enum, please make sure to make the
- * appropriate changes to all the enums mirroring it (e.g. OCREngine in
- * cityblock/workflow/detection/detection_storage.proto). Such enums will
- * mention the connection to OcrEngineMode in the comments.
- */
-enum OcrEngineMode {
-  OEM_TESSERACT_ONLY,          // Run Tesseract only - fastest; deprecated
-  OEM_LSTM_ONLY,               // Run just the LSTM line recognizer.
-  OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
-                               // to Tesseract when things get difficult.
-                               // deprecated
-  OEM_DEFAULT,                 // Specify this mode when calling init_*(),
-                               // to indicate that any of the above modes
-                               // should be automatically inferred from the
-                               // variables in the language-specific config,
-                               // command-line configs, or if not specified
-                               // in any of the above should be set to the
-                               // default OEM_TESSERACT_ONLY.
-  OEM_COUNT                    // Number of OEMs
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
--- a/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/renderer.h
+++ b/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/renderer.h
@ -1,311 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        renderer.h
-// Description: Rendering interface to inject into TessBaseAPI
-//
-// (C) Copyright 2011, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_RENDERER_H_
-#define TESSERACT_API_RENDERER_H_
-
-#include "export.h"
-
-// To avoid collision with other typenames include the ABSOLUTE MINIMUM
-// complexity of includes here. Use forward declarations wherever possible
-// and hide includes of complex types in baseapi.cpp.
-#include <cstdint>
-#include <string> // for std::string
-#include <vector> // for std::vector
-
-struct Pix;
-
-namespace tesseract {
-
-class TessBaseAPI;
-
-/**
- * Interface for rendering tesseract results into a document, such as text,
- * HOCR or pdf. This class is abstract. Specific classes handle individual
- * formats. This interface is then used to inject the renderer class into
- * tesseract when processing images.
- *
- * For simplicity implementing this with tesseract version 3.01,
- * the renderer contains document state that is cleared from document
- * to document just as the TessBaseAPI is. This way the base API can just
- * delegate its rendering functionality to injected renderers, and the
- * renderers can manage the associated state needed for the specific formats
- * in addition to the heuristics for producing it.
- */
-class TESS_API TessResultRenderer {
-public:
-  virtual ~TessResultRenderer();
-
-  // Takes ownership of pointer so must be new'd instance.
-  // Renderers aren't ordered, but appends the sequences of next parameter
-  // and existing next(). The renderers should be unique across both lists.
-  void insert(TessResultRenderer *next);
-
-  // Returns the next renderer or nullptr.
-  TessResultRenderer *next() {
-    return next_;
-  }
-
-  /**
-   * Starts a new document with the given title.
-   * This clears the contents of the output data.
-   * Title should use UTF-8 encoding.
-   */
-  bool BeginDocument(const char *title);
-
-  /**
-   * Adds the recognized text from the source image to the current document.
-   * Invalid if BeginDocument not yet called.
-   *
-   * Note that this API is a bit weird but is designed to fit into the
-   * current TessBaseAPI implementation where the api has lots of state
-   * information that we might want to add in.
-   */
-  bool AddImage(TessBaseAPI *api);
-
-  /**
-   * Finishes the document and finalizes the output data
-   * Invalid if BeginDocument not yet called.
-   */
-  bool EndDocument();
-
-  const char *file_extension() const {
-    return file_extension_;
-  }
-  const char *title() const {
-    return title_.c_str();
-  }
-
-  // Is everything fine? Otherwise something went wrong.
-  bool happy() const {
-    return happy_;
-  }
-
-  /**
-   * Returns the index of the last image given to AddImage
-   * (i.e. images are incremented whether the image succeeded or not)
-   *
-   * This is always defined. It means either the number of the
-   * current image, the last image ended, or in the completed document
-   * depending on when in the document lifecycle you are looking at it.
-   * Will return -1 if a document was never started.
-   */
-  int imagenum() const {
-    return imagenum_;
-  }
-
-protected:
-  /**
-   * Called by concrete classes.
-   *
-   * outputbase is the name of the output file excluding
-   * extension. For example, "/path/to/chocolate-chip-cookie-recipe"
-   *
-   * extension indicates the file extension to be used for output
-   * files. For example "pdf" will produce a .pdf file, and "hocr"
-   * will produce .hocr files.
-   */
-  TessResultRenderer(const char *outputbase, const char *extension);
-
-  // Hook for specialized handling in BeginDocument()
-  virtual bool BeginDocumentHandler();
-
-  // This must be overridden to render the OCR'd results
-  virtual bool AddImageHandler(TessBaseAPI *api) = 0;
-
-  // Hook for specialized handling in EndDocument()
-  virtual bool EndDocumentHandler();
-
-  // Renderers can call this to append '\0' terminated strings into
-  // the output string returned by GetOutput.
-  // This method will grow the output buffer if needed.
-  void AppendString(const char *s);
-
-  // Renderers can call this to append binary byte sequences into
-  // the output string returned by GetOutput. Note that s is not necessarily
-  // '\0' terminated (and can contain '\0' within it).
-  // This method will grow the output buffer if needed.
-  void AppendData(const char *s, int len);
-
-private:
-  TessResultRenderer *next_;   // Can link multiple renderers together
-  FILE *fout_;                 // output file pointer
-  const char *file_extension_; // standard extension for generated output
-  std::string title_;          // title of document being rendered
-  int imagenum_;               // index of last image added
-  bool happy_;                 // I get grumpy when the disk fills up, etc.
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessTextRenderer : public TessResultRenderer {
-public:
-  explicit TessTextRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into an hocr text string
- */
-class TESS_API TessHOcrRenderer : public TessResultRenderer {
-public:
-  explicit TessHOcrRenderer(const char *outputbase, bool font_info);
-  explicit TessHOcrRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool font_info_; // whether to print font information
-};
-
-/**
- * Renders tesseract output into an alto text string
- */
-class TESS_API TessAltoRenderer : public TessResultRenderer {
-public:
-  explicit TessAltoRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool begin_document;
-};
-
-/**
- * Renders Tesseract output into a TSV string
- */
-class TESS_API TessTsvRenderer : public TessResultRenderer {
-public:
-  explicit TessTsvRenderer(const char *outputbase, bool font_info);
-  explicit TessTsvRenderer(const char *outputbase);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  bool font_info_; // whether to print font information
-};
-
-/**
- * Renders tesseract output into searchable PDF
- */
-class TESS_API TessPDFRenderer : public TessResultRenderer {
-public:
-  // datadir is the location of the TESSDATA. We need it because
-  // we load a custom PDF font from this location.
-  TessPDFRenderer(const char *outputbase, const char *datadir,
-                  bool textonly = false);
-
-protected:
-  bool BeginDocumentHandler() override;
-  bool AddImageHandler(TessBaseAPI *api) override;
-  bool EndDocumentHandler() override;
-
-private:
-  // We don't want to have every image in memory at once,
-  // so we store some metadata as we go along producing
-  // PDFs one page at a time. At the end, that metadata is
-  // used to make everything that isn't easily handled in a
-  // streaming fashion.
-  long int obj_;                  // counter for PDF objects
-  std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
-  std::vector<long int> pages_;   // object number for every /Page object
-  std::string datadir_;           // where to find the custom font
-  bool textonly_;                 // skip images if set
-  // Bookkeeping only. DIY = Do It Yourself.
-  void AppendPDFObjectDIY(size_t objectsize);
-  // Bookkeeping + emit data.
-  void AppendPDFObject(const char *data);
-  // Create the /Contents object for an entire page.
-  char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
-  // Turn an image into a PDF object. Only transcode if we have to.
-  static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
-                            char **pdf_object, long int *pdf_object_size,
-                            int jpg_quality);
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessUnlvRenderer : public TessResultRenderer {
-public:
-  explicit TessUnlvRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string for LSTMBox
- */
-class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
-public:
-  explicit TessLSTMBoxRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string
- */
-class TESS_API TessBoxTextRenderer : public TessResultRenderer {
-public:
-  explicit TessBoxTextRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-/**
- * Renders tesseract output into a plain UTF-8 text string in WordStr format
- */
-class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
-public:
-  explicit TessWordStrBoxRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-#ifndef DISABLED_LEGACY_ENGINE
-
-/**
- * Renders tesseract output into an osd text string
- */
-class TESS_API TessOsdRenderer : public TessResultRenderer {
-public:
-  explicit TessOsdRenderer(const char *outputbase);
-
-protected:
-  bool AddImageHandler(TessBaseAPI *api) override;
-};
-
-#endif // ndef DISABLED_LEGACY_ENGINE
-
-} // namespace tesseract.
-
-#endif // TESSERACT_API_RENDERER_H_
--- a/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/resultiterator.h
+++ b/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/resultiterator.h
@ -1,250 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        resultiterator.h
-// Description: Iterator for tesseract results that is capable of
-//              iterating in proper reading order over Bi Directional
-//              (e.g. mixed Hebrew and English) text.
-// Author:      David Eger
-//
-// (C) Copyright 2011, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
-#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
-
-#include "export.h"            // for TESS_API, TESS_LOCAL
-#include "ltrresultiterator.h" // for LTRResultIterator
-#include "publictypes.h"       // for PageIteratorLevel
-#include "unichar.h"           // for StrongScriptDirection
-
-#include <set>    // for std::pair
-#include <vector> // for std::vector
-
-namespace tesseract {
-
-class TESS_API ResultIterator : public LTRResultIterator {
-public:
-  static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
-
-  /**
-   * ResultIterator is copy constructible!
-   * The default copy constructor works just fine for us.
-   */
-  ~ResultIterator() override = default;
-
-  // ============= Moving around within the page ============.
-  /**
-   * Moves the iterator to point to the start of the page to begin
-   * an iteration.
-   */
-  void Begin() override;
-
-  /**
-   * Moves to the start of the next object at the given level in the
-   * page hierarchy in the appropriate reading order and returns false if
-   * the end of the page was reached.
-   * NOTE that RIL_SYMBOL will skip non-text blocks, but all other
-   * PageIteratorLevel level values will visit each non-text block once.
-   * Think of non text blocks as containing a single para, with a single line,
-   * with a single imaginary word.
-   * Calls to Next with different levels may be freely intermixed.
-   * This function iterates words in right-to-left scripts correctly, if
-   * the appropriate language has been loaded into Tesseract.
-   */
-  bool Next(PageIteratorLevel level) override;
-
-  /**
-   * IsAtBeginningOf() returns whether we're at the logical beginning of the
-   * given level.  (as opposed to ResultIterator's left-to-right top-to-bottom
-   * order).  Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
-   * For a full description, see pageiterator.h
-   */
-  bool IsAtBeginningOf(PageIteratorLevel level) const override;
-
-  /**
-   * Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
-   * For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
-   * point at the last word in a paragraph.  See PageIterator for full comment.
-   */
-  bool IsAtFinalElement(PageIteratorLevel level,
-                        PageIteratorLevel element) const override;
-
-  // ============= Functions that refer to words only ============.
-  // Returns the number of blanks before the current word.
-  int BlanksBeforeWord() const;
-
-  // ============= Accessing data ==============.
-
-  /**
-   * Returns the null terminated UTF-8 encoded text string for the current
-   * object at the given level. Use delete [] to free after use.
-   */
-  virtual char *GetUTF8Text(PageIteratorLevel level) const;
-
-  /**
-   * Returns the LSTM choices for every LSTM timestep for the current word.
-   */
-  virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
-      *GetRawLSTMTimesteps() const;
-  virtual std::vector<std::vector<std::pair<const char *, float>>>
-      *GetBestLSTMSymbolChoices() const;
-
-  /**
-   * Return whether the current paragraph's dominant reading direction
-   * is left-to-right (as opposed to right-to-left).
-   */
-  bool ParagraphIsLtr() const;
-
-  // ============= Exposed only for testing =============.
-
-  /**
-   * Yields the reading order as a sequence of indices and (optional)
-   * meta-marks for a set of words (given left-to-right).
-   * The meta marks are passed as negative values:
-   *   kMinorRunStart  Start of minor direction text.
-   *   kMinorRunEnd    End of minor direction text.
-   *   kComplexWord    The next indexed word contains both left-to-right and
-   *                    right-to-left characters and was treated as neutral.
-   *
-   * For example, suppose we have five words in a text line,
-   * indexed [0,1,2,3,4] from the leftmost side of the text line.
-   * The following are all believable reading_orders:
-   *
-   * Left-to-Right (in ltr paragraph):
-   *     { 0, 1, 2, 3, 4 }
-   * Left-to-Right (in rtl paragraph):
-   *     { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
-   * Right-to-Left (in rtl paragraph):
-   *     { 4, 3, 2, 1, 0 }
-   * Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
-   *     { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
-   */
-  static void CalculateTextlineOrder(
-      bool paragraph_is_ltr,
-      const std::vector<StrongScriptDirection> &word_dirs,
-      std::vector<int> *reading_order);
-
-  static const int kMinorRunStart;
-  static const int kMinorRunEnd;
-  static const int kComplexWord;
-
-protected:
-  /**
-   * We presume the data associated with the given iterator will outlive us.
-   * NB: This is private because it does something that is non-obvious:
-   *   it resets to the beginning of the paragraph instead of staying wherever
-   *   resit might have pointed.
-   */
-  explicit ResultIterator(const LTRResultIterator &resit);
-
-private:
-  /**
-   * Calculates the current paragraph's dominant writing direction.
-   * Typically, members should use current_paragraph_ltr_ instead.
-   */
-  bool CurrentParagraphIsLtr() const;
-
-  /**
-   * Returns word indices as measured from resit->RestartRow() = index 0
-   * for the reading order of words within a textline given an iterator
-   * into the middle of the text line.
-   * In addition to non-negative word indices, the following negative values
-   * may be inserted:
-   *   kMinorRunStart  Start of minor direction text.
-   *   kMinorRunEnd    End of minor direction text.
-   *   kComplexWord    The previous word contains both left-to-right and
-   *                   right-to-left characters and was treated as neutral.
-   */
-  void CalculateTextlineOrder(bool paragraph_is_ltr,
-                              const LTRResultIterator &resit,
-                              std::vector<int> *indices) const;
-  /** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
-  void CalculateTextlineOrder(bool paragraph_is_ltr,
-                              const LTRResultIterator &resit,
-                              std::vector<StrongScriptDirection> *ssd,
-                              std::vector<int> *indices) const;
-
-  /**
-   * What is the index of the current word in a strict left-to-right reading
-   * of the row?
-   */
-  int LTRWordIndex() const;
-
-  /**
-   * Given an iterator pointing at a word, returns the logical reading order
-   * of blob indices for the word.
-   */
-  void CalculateBlobOrder(std::vector<int> *blob_indices) const;
-
-  /** Precondition: current_paragraph_is_ltr_ is set. */
-  void MoveToLogicalStartOfTextline();
-
-  /**
-   * Precondition: current_paragraph_is_ltr_ and in_minor_direction_
-   * are set.
-   */
-  void MoveToLogicalStartOfWord();
-
-  /** Are we pointing at the final (reading order) symbol of the word? */
-  bool IsAtFinalSymbolOfWord() const;
-
-  /** Are we pointing at the first (reading order) symbol of the word? */
-  bool IsAtFirstSymbolOfWord() const;
-
-  /**
-   * Append any extra marks that should be appended to this word when printed.
-   * Mostly, these are Unicode BiDi control characters.
-   */
-  void AppendSuffixMarks(std::string *text) const;
-
-  /** Appends the current word in reading order to the given buffer.*/
-  void AppendUTF8WordText(std::string *text) const;
-
-  /**
-   * Appends the text of the current text line, *assuming this iterator is
-   * positioned at the beginning of the text line*  This function
-   * updates the iterator to point to the first position past the text line.
-   * Each textline is terminated in a single newline character.
-   * If the textline ends a paragraph, it gets a second terminal newline.
-   */
-  void IterateAndAppendUTF8TextlineText(std::string *text);
-
-  /**
-   * Appends the text of the current paragraph in reading order
-   * to the given buffer.
-   * Each textline is terminated in a single newline character, and the
-   * paragraph gets an extra newline at the end.
-   */
-  void AppendUTF8ParagraphText(std::string *text) const;
-
-  /** Returns whether the bidi_debug flag is set to at least min_level. */
-  bool BidiDebug(int min_level) const;
-
-  bool current_paragraph_is_ltr_;
-
-  /**
-   * Is the currently pointed-at character at the beginning of
-   * a minor-direction run?
-   */
-  bool at_beginning_of_minor_run_;
-
-  /** Is the currently pointed-at character in a minor-direction sequence? */
-  bool in_minor_direction_;
-
-  /**
-   * Should detected inter-word spaces be preserved, or "compressed" to a single
-   * space character (default behavior).
-   */
-  bool preserve_interword_spaces_;
-};
-
-} // namespace tesseract.
-
-#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
--- a/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/unichar.h
+++ b/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/unichar.h
@ -1,174 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        unichar.h
-// Description: Unicode character/ligature class.
-// Author:      Ray Smith
-//
-// (C) Copyright 2006, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_CCUTIL_UNICHAR_H_
-#define TESSERACT_CCUTIL_UNICHAR_H_
-
-#include "export.h"
-
-#include <memory.h>
-#include <cstring>
-#include <string>
-#include <vector>
-
-namespace tesseract {
-
-// Maximum number of characters that can be stored in a UNICHAR. Must be
-// at least 4. Must not exceed 31 without changing the coding of length.
-#define UNICHAR_LEN 30
-
-// A UNICHAR_ID is the unique id of a unichar.
-using UNICHAR_ID = int;
-
-// A variable to indicate an invalid or uninitialized unichar id.
-static const int INVALID_UNICHAR_ID = -1;
-// A special unichar that corresponds to INVALID_UNICHAR_ID.
-static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
-
-enum StrongScriptDirection {
-  DIR_NEUTRAL = 0,       // Text contains only neutral characters.
-  DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
-  DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
-  DIR_MIX = 3,           // Text contains a mixture of left-to-right
-                         // and right-to-left characters.
-};
-
-using char32 = signed int;
-
-// The UNICHAR class holds a single classification result. This may be
-// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
-// multiple Unicode characters representing the NFKC expansion of a ligature
-// such as fi, ffl etc. These are also stored as utf8.
-class TESS_API UNICHAR {
-public:
-  UNICHAR() {
-    memset(chars, 0, UNICHAR_LEN);
-  }
-
-  // Construct from a utf8 string. If len<0 then the string is null terminated.
-  // If the string is too long to fit in the UNICHAR then it takes only what
-  // will fit.
-  UNICHAR(const char *utf8_str, int len);
-
-  // Construct from a single UCS4 character.
-  explicit UNICHAR(int unicode);
-
-  // Default copy constructor and operator= are OK.
-
-  // Get the first character as UCS-4.
-  int first_uni() const;
-
-  // Get the length of the UTF8 string.
-  int utf8_len() const {
-    int len = chars[UNICHAR_LEN - 1];
-    return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
-  }
-
-  // Get a UTF8 string, but NOT nullptr terminated.
-  const char *utf8() const {
-    return chars;
-  }
-
-  // Get a terminated UTF8 string: Must delete[] it after use.
-  char *utf8_str() const;
-
-  // Get the number of bytes in the first character of the given utf8 string.
-  static int utf8_step(const char *utf8_str);
-
-  // A class to simplify iterating over and accessing elements of a UTF8
-  // string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
-  // take ownership of the underlying byte array. It also does not permit
-  // modification of the array (as the name suggests).
-  //
-  // Example:
-  //   for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
-  //        it != UNICHAR::end(str, len);
-  //        ++it) {
-  //     printf("UCS-4 symbol code = %d\n", *it);
-  //     char buf[5];
-  //     int char_len = it.get_utf8(buf); buf[char_len] = '\0';
-  //     printf("Char = %s\n", buf);
-  //   }
-  class TESS_API const_iterator {
-    using CI = const_iterator;
-
-  public:
-    // Step to the next UTF8 character.
-    // If the current position is at an illegal UTF8 character, then print an
-    // error message and step by one byte. If the current position is at a
-    // nullptr value, don't step past it.
-    const_iterator &operator++();
-
-    // Return the UCS-4 value at the current position.
-    // If the current position is at an illegal UTF8 value, return a single
-    // space character.
-    int operator*() const;
-
-    // Store the UTF-8 encoding of the current codepoint into buf, which must be
-    // at least 4 bytes long. Return the number of bytes written.
-    // If the current position is at an illegal UTF8 value, writes a single
-    // space character and returns 1.
-    // Note that this method does not null-terminate the buffer.
-    int get_utf8(char *buf) const;
-    // Returns the number of bytes of the current codepoint. Returns 1 if the
-    // current position is at an illegal UTF8 value.
-    int utf8_len() const;
-    // Returns true if the UTF-8 encoding at the current position is legal.
-    bool is_legal() const;
-
-    // Return the pointer into the string at the current position.
-    const char *utf8_data() const {
-      return it_;
-    }
-
-    // Iterator equality operators.
-    friend bool operator==(const CI &lhs, const CI &rhs) {
-      return lhs.it_ == rhs.it_;
-    }
-    friend bool operator!=(const CI &lhs, const CI &rhs) {
-      return !(lhs == rhs);
-    }
-
-  private:
-    friend class UNICHAR;
-    explicit const_iterator(const char *it) : it_(it) {}
-
-    const char *it_; // Pointer into the string.
-  };
-
-  // Create a start/end iterator pointing to a string. Note that these methods
-  // are static and do NOT create a copy or take ownership of the underlying
-  // array.
-  static const_iterator begin(const char *utf8_str, int byte_length);
-  static const_iterator end(const char *utf8_str, int byte_length);
-
-  // Converts a utf-8 string to a vector of unicodes.
-  // Returns an empty vector if the input contains invalid UTF-8.
-  static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
-  // Converts a vector of unicodes to a utf8 string.
-  // Returns an empty string if the input contains an invalid unicode.
-  static std::string UTF32ToUTF8(const std::vector<char32> &str32);
-
-private:
-  // A UTF-8 representation of 1 or more Unicode characters.
-  // The last element (chars[UNICHAR_LEN - 1]) is a length if
-  // its value < UNICHAR_LEN, otherwise it is a genuine character.
-  char chars[UNICHAR_LEN]{};
-};
-
-} // namespace tesseract
-
-#endif // TESSERACT_CCUTIL_UNICHAR_H_
--- a/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/version.h
+++ b/third_party/ocr/tesseract-ocr/uos/mips64/include/tesseract/version.h
@ -1,34 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// File:        version.h
-// Description: Version information
-//
-// (C) Copyright 2018, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TESSERACT_API_VERSION_H_
-#define TESSERACT_API_VERSION_H_
-
-// clang-format off
-
-#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
-#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
-#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
-
-#define TESSERACT_VERSION          \
-  (TESSERACT_MAJOR_VERSION << 16 | \
-   TESSERACT_MINOR_VERSION <<  8 | \
-   TESSERACT_MICRO_VERSION)
-
-#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
-
-// clang-format on
-
-#endif // TESSERACT_API_VERSION_H_